diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
index b1b3e35e478..7e8fb003317 100644
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@@ -111,46 +111,7 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
-  CompressionTest:
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.9"]
-    runs-on: ${{ matrix.os }}
-    environment: openai1
-    steps:
-      # checkout to pr branch
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies
-        run: |
-          docker --version
-          python -m pip install --upgrade pip wheel
-          pip install -e .
-          python -c "import autogen"
-          pip install pytest-cov>=5 pytest-asyncio
-      - name: Install packages for test when needed
-        run: |
-          pip install docker
-      - name: Coverage
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
-          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
-        run: |
-          pytest test/agentchat/contrib/test_compressible_agent.py
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
+
   GPTAssistantAgent:
     strategy:
       matrix:
@@ -306,44 +267,7 @@ jobs:
         with:
           file: ./coverage.xml
           flags: unittests
-  ContextHandling:
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.11"]
-    runs-on: ${{ matrix.os }}
-    environment: openai1
-    steps:
-      # checkout to pr branch
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies
-        run: |
-          docker --version
-          python -m pip install --upgrade pip wheel
-          pip install -e .
-          python -c "import autogen"
-          pip install pytest-cov>=5
-      - name: Coverage
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
-          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
-          BING_API_KEY: ${{ secrets.BING_API_KEY }}
-        run: |
-          pytest test/agentchat/contrib/capabilities/test_context_handling.py
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
+
   ImageGen:
     strategy:
       matrix:
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
index a92c700d3b2..810fdbfd778 100644
--- a/.github/workflows/contrib-tests.yml
+++ b/.github/workflows/contrib-tests.yml
@@ -163,41 +163,6 @@ jobs:
           file: ./coverage.xml
           flags: unittests
 
-  CompressionTest:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        python-version: ["3.10"]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies for all tests
-        run: |
-          python -m pip install --upgrade pip wheel
-          pip install pytest-cov>=5
-      - name: Install packages and dependencies for Compression
-        run: |
-          pip install -e .
-      - name: Set AUTOGEN_USE_DOCKER based on OS
-        shell: bash
-        run: |
-          if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
-            echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
-          fi
-      - name: Coverage
-        run: |
-          pytest test/agentchat/contrib/test_compressible_agent.py --skip-openai
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
-
   GPTAssistantAgent:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -384,41 +349,6 @@ jobs:
           file: ./coverage.xml
           flags: unittests
 
-  ContextHandling:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        python-version: ["3.11"]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies for all tests
-        run: |
-          python -m pip install --upgrade pip wheel
-          pip install pytest-cov>=5
-      - name: Install packages and dependencies for Context Handling
-        run: |
-          pip install -e .
-      - name: Set AUTOGEN_USE_DOCKER based on OS
-        shell: bash
-        run: |
-          if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
-            echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
-          fi
-      - name: Coverage
-        run: |
-          pytest test/agentchat/contrib/capabilities/test_context_handling.py --skip-openai
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
-
   TransformMessages:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -485,7 +415,6 @@ jobs:
           file: ./coverage.xml
           flags: unittests
 
-
   AnthropicTest:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -684,7 +613,7 @@ jobs:
           file: ./coverage.xml
           flags: unittests
 
-  OllamaTest:
+  BedrockTest:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -706,9 +635,9 @@ jobs:
         run: |
           python -m pip install --upgrade pip wheel
           pip install pytest-cov>=5
-      - name: Install packages and dependencies for Ollama
+      - name: Install packages and dependencies for Amazon Bedrock
         run: |
-          pip install -e .[ollama,test]
+          pip install -e .[boto3,test]
       - name: Set AUTOGEN_USE_DOCKER based on OS
         shell: bash
         run: |
@@ -717,9 +646,41 @@ jobs:
           fi
       - name: Coverage
         run: |
-          pytest test/oai/test_ollama.py --skip-openai
+          pytest test/oai/test_bedrock.py --skip-openai
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
           flags: unittests
+
+  OllamaTest:
+      runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-2019]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        exclude:
+          - os: macos-latest
+            python-version: "3.9"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies for all tests
+        run: |
+          python -m pip install --upgrade pip wheel
+          pip install pytest-cov>=5
+      - name: Install packages and dependencies for Ollama
+        run: |
+          pip install -e .[ollama,test]
+          pytest test/oai/test_ollama.py --skip-openai
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
\ No newline at end of file
diff --git a/.github/workflows/dotnet-build.yml b/.github/workflows/dotnet-build.yml
index 6b7056cce6d..6aac54d3818 100644
--- a/.github/workflows/dotnet-build.yml
+++ b/.github/workflows/dotnet-build.yml
@@ -51,12 +51,26 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        os: [ ubuntu-latest, macos-latest ]
+        python-version: ["3.11"]
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
     steps:
     - uses: actions/checkout@v4
       with:
         lfs: true
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install jupyter and ipykernel
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install jupyter
+        python -m pip install ipykernel
+    - name: list available kernels
+      run: |
+        python -m jupyter kernelspec list
     - name: Setup .NET
       uses: actions/setup-dotnet@v4
       with:
@@ -114,6 +128,18 @@ jobs:
     - uses: actions/checkout@v4
       with:
         lfs: true
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.11
+    - name: Install jupyter and ipykernel
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install jupyter
+        python -m pip install ipykernel
+    - name: list available kernels
+      run: |
+        python -m jupyter kernelspec list
     - name: Setup .NET
       uses: actions/setup-dotnet@v4
       with:
diff --git a/.github/workflows/dotnet-release.yml b/.github/workflows/dotnet-release.yml
index aacfd115bb7..23f4258a0e0 100644
--- a/.github/workflows/dotnet-release.yml
+++ b/.github/workflows/dotnet-release.yml
@@ -29,6 +29,18 @@ jobs:
     - uses: actions/checkout@v4
       with:
         lfs: true
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.11
+    - name: Install jupyter and ipykernel
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install jupyter
+        python -m pip install ipykernel
+    - name: list available kernels
+      run: |
+        python -m jupyter kernelspec list
     - name: Setup .NET
       uses: actions/setup-dotnet@v4
       with:
diff --git a/CITATION.cff b/CITATION.cff
index bc9a03f375a..5e4c468067f 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -5,7 +5,7 @@ preferred-citation:
     given-names: "Qingyun"
     affiliation: "Penn State University, University Park PA USA"
   - family-names: "Bansal"
-    given-names: "Gargan"
+    given-names: "Gagan"
     affiliation: "Microsoft Research, Redmond WA USA"
   - family-names: "Zhang"
     given-names: "Jieyu"
@@ -43,6 +43,7 @@ preferred-citation:
   - family-names: "Wang"
     given-names: "Chi"
     affiliation: "Microsoft Research, Redmond WA USA"
-  booktitle: "ArXiv preprint arXiv:2308.08155"
+  booktitle: "COLM"
   title: "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework"
-  year: 2023
+  year: 2024
+  url: "https://aka.ms/autogen-pdf"
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
new file mode 100644
index 00000000000..4726588453b
--- /dev/null
+++ b/CONTRIBUTORS.md
@@ -0,0 +1,43 @@
+# Contributors
+
+## Special thanks to all the people who help this project:
+> These individuals dedicate their time and expertise to improve this project. We are deeply grateful for their contributions.
+
+| Name | GitHub Handle | Organization | Features | Roadmap Lead | Additional  Information |
+|---|---|---|---|---|---|
+| Qingyun Wu | [qingyun-wu](https://github.com/qingyun-wu) | Penn State University | all, alt-models, autobuilder | Yes | Available most of the time (US Eastern Time) |
+| Chi Wang | [sonichi](https://github.com/sonichi) | - | all | Yes | |
+| Li Jiang | [thinkall](https://github.com/thinkall) | Microsoft | rag, autobuilder, group chat | Yes | [Issue #1657](https://github.com/microsoft/autogen/issues/1657) - Beijing, GMT+8 |
+| Mark Sze | [marklysze](https://github.com/marklysze) | - | alt-models, group chat | No | Generally available (Sydney, AU time) - Group Chat "auto" speaker selection |
+| Hrushikesh Dokala | [Hk669](https://github.com/Hk669) | - | alt-models, swebench, logging, rag | No | [Issue #2946](https://github.com/microsoft/autogen/issues/2946), [Pull Request #2933](https://github.com/microsoft/autogen/pull/2933) - Available most of the time (India, GMT+5:30) |
+| Jiale Liu | [LeoLjl](https://github.com/LeoLjl) | Penn State University | autobuild, group chat | No | |
+| Shaokun Zhang | [skzhang1](https://github.com/skzhang1) | Penn State University | AgentOptimizer, Teachability            | Yes | [Issue #521](https://github.com/microsoft/autogen/issues/521)                         |
+| Rajan Chari | [rajan-chari](https://github.com/rajan-chari) | Microsoft Research | CAP, Survey of other frameworks | No | |
+| Victor Dibia | [victordibia](https://github.com/victordibia) | Microsoft Research | autogenstudio | Yes | [Issue #737](https://github.com/microsoft/autogen/issues/737) |
+| Yixuan Zhai | [randombet](https://github.com/randombet) | Meta | group chat, sequential_chats, rag       | No | |
+| Xiaoyun Zhang | [LittleLittleCloud](https://github.com/LittleLittleCloud) | Microsoft | AutoGen.Net, group chat | Yes | [Backlog - AutoGen.Net](https://github.com/microsoft/autogen/issues) - Available most of the time (PST) |
+| Yiran Wu | [yiranwu0](https://github.com/yiranwu0) | Penn State University | alt-models, group chat, logging | Yes | |
+| Beibin Li | [BeibinLi](https://github.com/BeibinLi) | Microsoft Research | alt-models | Yes | |
+| Gagan Bansal | [gagb](https://github.com/gagb) | Microsoft Research |  All | | |
+| Adam Fourney | [afourney](https://github.com/afourney) | Microsoft Research |  Complex Tasks | | |
+| Ricky Loynd | [rickyloynd-microsoft](https://github.com/rickyloynd-microsoft) | Microsoft Research | Teachability | | |
+| Eric Zhu | [ekzhu](https://github.com/ekzhu) | Microsoft Research |  All, Infra | | |
+| Jack Gerrits | [jackgerrits](https://github.com/jackgerrits) | Microsoft Research |  All, Infra | | |
+| David Luong | [DavidLuong98](https://github.com/DavidLuong98) | Microsoft | AutoGen.Net | | |
+| Davor Runje | [davorrunje](https://github.com/davorrunje) | airt.ai | Tool calling, IO | | Available most of the time (Central European Time) |
+| Friederike Niedtner | [Friderike](https://www.microsoft.com/en-us/research/people/fniedtner/) | Microsoft Research | PM | | |
+| Rafah Hosn | [Rafah](https://www.microsoft.com/en-us/research/people/raaboulh/) | Microsoft Research | PM | | |
+| Robin Moeur | [Robin](https://www.linkedin.com/in/rmoeur/) | Microsoft Research | PM | | |
+| Jingya Chen | [jingyachen](https://github.com/JingyaChen) | Microsoft | UX Design, AutoGen Studio | | |
+| Suff Syed | [suffsyed](https://github.com/suffsyed) | Microsoft | UX Design, AutoGen Studio | | |
+
+## I would like to join this list. How can I help the project?
+> We're always looking for new contributors to join our team and help improve the project. For more information, please refer to our [CONTRIBUTING](https://microsoft.github.io/autogen/docs/contributor-guide/contributing) guide.
+
+
+## Are you missing from this list?
+> Please open a PR to help us fix this.
+
+
+## Acknowledgements
+This template was adapted from [GitHub Template Guide](https://github.com/cezaraugusto/github-template-guidelines/blob/master/.github/CONTRIBUTORS.md) by [cezaraugusto](https://github.com/cezaraugusto).
diff --git a/README.md b/README.md
index 7c7ac4b85c5..1a37ebe3e5f 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,35 @@
 <a name="readme-top"></a>
 
+
+<div align="center">
+
+<img src="https://microsoft.github.io/autogen/img/ag.svg" alt="AutoGen Logo" width="100">
+
+
 [![PyPI version](https://badge.fury.io/py/pyautogen.svg)](https://badge.fury.io/py/pyautogen)
 [![Build](https://github.com/microsoft/autogen/actions/workflows/python-package.yml/badge.svg)](https://github.com/microsoft/autogen/actions/workflows/python-package.yml)
 ![Python Version](https://img.shields.io/badge/3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)
 [![Downloads](https://static.pepy.tech/badge/pyautogen/week)](https://pepy.tech/project/pyautogen)
+
+[![NuGet version](https://badge.fury.io/nu/AutoGen.Core.svg)](https://badge.fury.io/nu/AutoGen.Core)
+
+
 [![Discord](https://img.shields.io/discord/1153072414184452236?logo=discord&style=flat)](https://aka.ms/autogen-dc)
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40pyautogen)](https://twitter.com/pyautogen)
 
-[![NuGet version](https://badge.fury.io/nu/AutoGen.Core.svg)](https://badge.fury.io/nu/AutoGen.Core)
+</div>
 
 # AutoGen
+
 [📚 Cite paper](#related-papers).
 <!-- <p align="center">
     <img src="https://github.com/microsoft/autogen/blob/main/website/static/img/flaml.svg"  width=200>
     <br>
 </p> -->
+:fire: June 6, 2024: WIRED publishes a new article on AutoGen: [Chatbot Teamwork Makes the AI Dream Work](https://www.wired.com/story/chatbot-teamwork-makes-the-ai-dream-work/) based on interview with [Adam Fourney](https://github.com/afourney).
+
+:fire: June 4th, 2024: Microsoft Research Forum publishes new update and video on [AutoGen and Complex Tasks](https://www.microsoft.com/en-us/research/video/autogen-update-complex-tasks-and-agents/) presented by [Adam Fourney](https://github.com/afourney).
+
 :fire: May 29, 2024: DeepLearning.ai launched a new short course [AI Agentic Design Patterns with AutoGen](https://www.deeplearning.ai/short-courses/ai-agentic-design-patterns-with-autogen), made in collaboration with Microsoft and Penn State University, and taught by AutoGen creators [Chi Wang](https://github.com/sonichi) and [Qingyun Wu](https://github.com/qingyun-wu).
 
 :fire: May 24, 2024: Foundation Capital published an article on [Forbes: The Promise of Multi-Agent AI](https://www.forbes.com/sites/joannechen/2024/05/24/the-promise-of-multi-agent-ai/?sh=2c1e4f454d97) and a video [AI in the Real World Episode 2: Exploring Multi-Agent AI and AutoGen with Chi Wang](https://www.youtube.com/watch?v=RLwyXRVvlNk).
@@ -68,10 +83,7 @@
 
 AutoGen is an open-source programming framework for building AI agents and facilitating cooperation among multiple agents to solve tasks. AutoGen aims to streamline the development and research of agentic AI, much like PyTorch does for Deep Learning. It offers features such as agents capable of interacting with each other, facilitates the use of various large language models (LLMs) and tool use support, autonomous and human-in-the-loop workflows, and multi-agent conversation patterns.
 
-**Open Source Statement**: The project welcomes contributions from developers and organizations worldwide. Our goal is to foster a collaborative and inclusive community where diverse perspectives and expertise can drive innovation and enhance the project's capabilities. Whether you are an individual contributor or represent an organization, we invite you to join us in shaping the future of this project. Together, we can build something truly remarkable.
-
-The project is currently maintained by a [dynamic group of volunteers](https://butternut-swordtail-8a5.notion.site/410675be605442d3ada9a42eb4dfef30?v=fa5d0a79fd3d4c0f9c112951b2831cbb&pvs=4) from several different organizations. Contact project administrators Chi Wang and Qingyun Wu via auto-gen@outlook.com if you are interested in becoming a maintainer.
-
+We welcome contributions from developers and organizations worldwide. Our goal is to foster a collaborative and inclusive community where diverse perspectives and expertise can drive innovation and enhance the project's capabilities. We acknowledge the invaluable contributions from our existing contributors, as listed in [contributors.md](./CONTRIBUTORS.md). Whether you are an individual contributor or represent an organization, we invite you to join us in shaping the future of this project. For further information please also see [Microsoft open-source contributing guidelines](https://github.com/microsoft/autogen?tab=readme-ov-file#contributing).
 
 ![AutoGen Overview](https://github.com/microsoft/autogen/blob/main/website/static/img/autogen_agentchat.png)
 
@@ -247,16 +259,25 @@ In addition, you can find:
 
 ## Related Papers
 
-[AutoGen](https://arxiv.org/abs/2308.08155)
+[AutoGen Studio](https://www.microsoft.com/en-us/research/publication/autogen-studio-a-no-code-developer-tool-for-building-and-debugging-multi-agent-systems/)
+
+```
+@inproceedings{dibia2024studio,
+      title={AutoGen Studio: A No-Code Developer Tool for Building and Debugging Multi-Agent Systems},
+      author={Victor Dibia and Jingya Chen and Gagan Bansal and Suff Syed and Adam Fourney and Erkang (Eric) Zhu and Chi Wang and Saleema Amershi},
+      year={2024},
+      booktitle={Pre-Print}
+}
+```
+
+[AutoGen](https://aka.ms/autogen-pdf)
 
 ```
 @inproceedings{wu2023autogen,
       title={AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework},
       author={Qingyun Wu and Gagan Bansal and Jieyu Zhang and Yiran Wu and Beibin Li and Erkang Zhu and Li Jiang and Xiaoyun Zhang and Shaokun Zhang and Jiale Liu and Ahmed Hassan Awadallah and Ryen W White and Doug Burger and Chi Wang},
-      year={2023},
-      eprint={2308.08155},
-      archivePrefix={arXiv},
-      primaryClass={cs.AI}
+      year={2024},
+      booktitle={COLM},
 }
 ```
 
@@ -354,7 +375,7 @@ may be either trademarks or registered trademarks of Microsoft in the United Sta
 The licenses for this project do not grant you rights to use any Microsoft names, logos, or trademarks.
 Microsoft's general trademark guidelines can be found at http://go.microsoft.com/fwlink/?LinkID=254653.
 
-Privacy information can be found at https://privacy.microsoft.com/en-us/
+Privacy information can be found at https://go.microsoft.com/fwlink/?LinkId=521839
 
 Microsoft and any contributors reserve all other rights, whether under their respective copyrights, patents,
 or trademarks, whether by implication, estoppel, or otherwise.
diff --git a/TRANSPARENCY_FAQS.md b/TRANSPARENCY_FAQS.md
index 206af084748..addf29d8b8d 100644
--- a/TRANSPARENCY_FAQS.md
+++ b/TRANSPARENCY_FAQS.md
@@ -31,6 +31,8 @@ While AutoGen automates LLM workflows, decisions about how to use specific LLM o
 -	Current version of AutoGen was evaluated on six applications to illustrate its potential in simplifying the development of high-performance multi-agent applications. These applications are selected based on their real-world relevance,  problem difficulty and problem solving capabilities enabled by AutoGen, and innovative potential.
 -	These applications involve using AutoGen to solve math problems, question answering, decision making in text world environments, supply chain optimization, etc. For each of these domains AutoGen was evaluated on various success based metrics (i.e., how often the AutoGen based implementation solved the task). And, in some cases, AutoGen based approach was also evaluated on implementation efficiency (e.g., to track reductions in developer effort to build). More details can be found at: https://aka.ms/AutoGen/TechReport
 - The team has conducted tests where a “red” agent attempts to get the default AutoGen assistant to break from its alignment and guardrails. The team has observed that out of 70 attempts to break guardrails, only 1 was successful in producing text that would have been flagged as problematic by Azure OpenAI filters. The team has not observed any evidence that AutoGen (or GPT models as hosted by OpenAI or Azure) can produce novel code exploits or jailbreak prompts, since direct prompts to “be a hacker”, “write exploits”, or “produce a phishing email” are refused by existing filters.
+- We also evaluated [a team of AutoGen agents](https://github.com/microsoft/autogen/tree/gaia_multiagent_v01_march_1st/samples/tools/autogenbench/scenarios/GAIA/Templates/Orchestrator) on the [GAIA benchmarks](https://arxiv.org/abs/2311.12983), and got [SOTA results](https://huggingface.co/spaces/gaia-benchmark/leaderboard) as of
+ March 1, 2024.
 
 ## What are the limitations of AutoGen? How can users minimize the impact of AutoGen’s limitations when using the system?
 AutoGen relies on existing LLMs. Experimenting with AutoGen would retain common limitations of large language models; including:
diff --git a/autogen/agentchat/contrib/capabilities/context_handling.py b/autogen/agentchat/contrib/capabilities/context_handling.py
deleted file mode 100644
index 44b10259f1b..00000000000
--- a/autogen/agentchat/contrib/capabilities/context_handling.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import sys
-from typing import Dict, List, Optional
-from warnings import warn
-
-import tiktoken
-from termcolor import colored
-
-from autogen import ConversableAgent, token_count_utils
-
-warn(
-    "Context handling with TransformChatHistory is deprecated and will be removed in `0.2.30`. "
-    "Please use `TransformMessages`, documentation can be found at https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-class TransformChatHistory:
-    """
-    An agent's chat history with other agents is a common context that it uses to generate a reply.
-    This capability allows the agent to transform its chat history prior to using it to generate a reply.
-    It does not permanently modify the chat history, but rather processes it on every invocation.
-
-    This capability class enables various strategies to transform chat history, such as:
-    - Truncate messages: Truncate each message to first maximum number of tokens.
-    - Limit number of messages: Truncate the chat history to a maximum number of (recent) messages.
-    - Limit number of tokens: Truncate the chat history to number of recent N messages that fit in
-    maximum number of tokens.
-    Note that the system message, because of its special significance, is always kept as is.
-
-    The three strategies can be combined. For example, when each of these parameters are specified
-    they are used in the following order:
-    1. First truncate messages to a maximum number of tokens
-    2. Second, it limits the number of message to keep
-    3. Third, it limits the total number of tokens in the chat history
-
-    When adding this capability to an agent, the following are modified:
-    - A hook is added to the hookable method `process_all_messages_before_reply` to transform the
-    received messages for possible truncation.
-    Not modifying the stored message history.
-    """
-
-    def __init__(
-        self,
-        *,
-        max_tokens_per_message: Optional[int] = None,
-        max_messages: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-    ):
-        """
-        Args:
-            max_tokens_per_message (Optional[int]): Maximum number of tokens to keep in each message.
-            max_messages (Optional[int]): Maximum number of messages to keep in the context.
-            max_tokens (Optional[int]): Maximum number of tokens to keep in the context.
-        """
-        self.max_tokens_per_message = max_tokens_per_message if max_tokens_per_message else sys.maxsize
-        self.max_messages = max_messages if max_messages else sys.maxsize
-        self.max_tokens = max_tokens if max_tokens else sys.maxsize
-
-    def add_to_agent(self, agent: ConversableAgent):
-        """
-        Adds TransformChatHistory capability to the given agent.
-        """
-        agent.register_hook(hookable_method="process_all_messages_before_reply", hook=self._transform_messages)
-
-    def _transform_messages(self, messages: List[Dict]) -> List[Dict]:
-        """
-        Args:
-            messages: List of messages to process.
-
-        Returns:
-            List of messages with the first system message and the last max_messages messages,
-            ensuring each message does not exceed max_tokens_per_message.
-        """
-        temp_messages = messages.copy()
-        processed_messages = []
-        system_message = None
-        processed_messages_tokens = 0
-
-        if messages[0]["role"] == "system":
-            system_message = messages[0].copy()
-            temp_messages.pop(0)
-
-        total_tokens = sum(
-            token_count_utils.count_token(msg["content"]) for msg in temp_messages
-        )  # Calculate tokens for all messages
-
-        # Truncate each message's content to a maximum token limit of each message
-
-        # Process recent messages first
-        for msg in reversed(temp_messages[-self.max_messages :]):
-            msg["content"] = truncate_str_to_tokens(msg["content"], self.max_tokens_per_message)
-            msg_tokens = token_count_utils.count_token(msg["content"])
-            if processed_messages_tokens + msg_tokens > self.max_tokens:
-                break
-            # append the message to the beginning of the list to preserve order
-            processed_messages = [msg] + processed_messages
-            processed_messages_tokens += msg_tokens
-        if system_message:
-            processed_messages.insert(0, system_message)
-        # Optionally, log the number of truncated messages and tokens if needed
-        num_truncated = len(messages) - len(processed_messages)
-
-        if num_truncated > 0 or total_tokens > processed_messages_tokens:
-            print(
-                colored(
-                    f"Truncated {num_truncated} messages. Reduced from {len(messages)} to {len(processed_messages)}.",
-                    "yellow",
-                )
-            )
-            print(
-                colored(
-                    f"Truncated {total_tokens - processed_messages_tokens} tokens. Tokens reduced from {total_tokens} to {processed_messages_tokens}",
-                    "yellow",
-                )
-            )
-        return processed_messages
-
-
-def truncate_str_to_tokens(text: str, max_tokens: int, model: str = "gpt-3.5-turbo-0613") -> str:
-    """Truncate a string so that the number of tokens is less than or equal to max_tokens using tiktoken.
-
-    Args:
-        text: The string to truncate.
-        max_tokens: The maximum number of tokens to keep.
-        model: The target OpenAI model for tokenization alignment.
-
-    Returns:
-        The truncated string.
-    """
-
-    encoding = tiktoken.encoding_for_model(model)  # Get the appropriate tokenizer
-
-    encoded_tokens = encoding.encode(text)
-    truncated_tokens = encoded_tokens[:max_tokens]
-    truncated_text = encoding.decode(truncated_tokens)  # Decode back to text
-
-    return truncated_text
diff --git a/autogen/agentchat/contrib/capabilities/transform_messages.py b/autogen/agentchat/contrib/capabilities/transform_messages.py
index e96dc39fa7b..1ce219bdadf 100644
--- a/autogen/agentchat/contrib/capabilities/transform_messages.py
+++ b/autogen/agentchat/contrib/capabilities/transform_messages.py
@@ -1,9 +1,8 @@
 import copy
 from typing import Dict, List
 
-from autogen import ConversableAgent
-
 from ....formatting_utils import colored
+from ...conversable_agent import ConversableAgent
 from .transforms import MessageTransform
 
 
diff --git a/autogen/agentchat/contrib/capabilities/transforms.py b/autogen/agentchat/contrib/capabilities/transforms.py
index dad3fc335ed..d9ad365b91b 100644
--- a/autogen/agentchat/contrib/capabilities/transforms.py
+++ b/autogen/agentchat/contrib/capabilities/transforms.py
@@ -53,13 +53,16 @@ class MessageHistoryLimiter:
     It trims the conversation history by removing older messages, retaining only the most recent messages.
     """
 
-    def __init__(self, max_messages: Optional[int] = None):
+    def __init__(self, max_messages: Optional[int] = None, keep_first_message: bool = False):
         """
         Args:
             max_messages Optional[int]: Maximum number of messages to keep in the context. Must be greater than 0 if not None.
+            keep_first_message bool: Whether to keep the original first message in the conversation history.
+                Defaults to False.
         """
         self._validate_max_messages(max_messages)
         self._max_messages = max_messages
+        self._keep_first_message = keep_first_message
 
     def apply_transform(self, messages: List[Dict]) -> List[Dict]:
         """Truncates the conversation history to the specified maximum number of messages.
@@ -75,10 +78,31 @@ def apply_transform(self, messages: List[Dict]) -> List[Dict]:
             List[Dict]: A new list containing the most recent messages up to the specified maximum.
         """
 
-        if self._max_messages is None:
+        if self._max_messages is None or len(messages) <= self._max_messages:
             return messages
 
-        return messages[-self._max_messages :]
+        truncated_messages = []
+        remaining_count = self._max_messages
+
+        # Start with the first message if we need to keep it
+        if self._keep_first_message:
+            truncated_messages = [messages[0]]
+            remaining_count -= 1
+
+        # Loop through messages in reverse
+        for i in range(len(messages) - 1, 0, -1):
+            if remaining_count > 1:
+                truncated_messages.insert(1 if self._keep_first_message else 0, messages[i])
+            if remaining_count == 1:
+                # If there's only 1 slot left and it's a 'tools' message, ignore it.
+                if messages[i].get("role") != "tool":
+                    truncated_messages.insert(1, messages[i])
+
+            remaining_count -= 1
+            if remaining_count == 0:
+                break
+
+        return truncated_messages
 
     def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]:
         pre_transform_messages_len = len(pre_transform_messages)
@@ -421,3 +445,95 @@ def _compress_text(self, text: str) -> Tuple[str, int]:
     def _validate_min_tokens(self, min_tokens: Optional[int]):
         if min_tokens is not None and min_tokens <= 0:
             raise ValueError("min_tokens must be greater than 0 or None")
+
+
+class TextMessageContentName:
+    """A transform for including the agent's name in the content of a message."""
+
+    def __init__(
+        self,
+        position: str = "start",
+        format_string: str = "{name}:\n",
+        deduplicate: bool = True,
+        filter_dict: Optional[Dict] = None,
+        exclude_filter: bool = True,
+    ):
+        """
+        Args:
+            position (str): The position to add the name to the content. The possible options are 'start' or 'end'. Defaults to 'start'.
+            format_string (str): The f-string to format the message name with. Use '{name}' as a placeholder for the agent's name. Defaults to '{name}:\n' and must contain '{name}'.
+            deduplicate (bool): Whether to deduplicate the formatted string so it doesn't appear twice (sometimes the LLM will add it to new messages itself). Defaults to True.
+            filter_dict (None or dict): A dictionary to filter out messages that you want/don't want to compress.
+                If None, no filters will be applied.
+            exclude_filter (bool): If exclude filter is True (the default value), messages that match the filter will be
+                excluded from compression. If False, messages that match the filter will be compressed.
+        """
+
+        assert isinstance(position, str) and position is not None
+        assert position in ["start", "end"]
+        assert isinstance(format_string, str) and format_string is not None
+        assert "{name}" in format_string
+        assert isinstance(deduplicate, bool) and deduplicate is not None
+
+        self._position = position
+        self._format_string = format_string
+        self._deduplicate = deduplicate
+        self._filter_dict = filter_dict
+        self._exclude_filter = exclude_filter
+
+        # Track the number of messages changed for logging
+        self._messages_changed = 0
+
+    def apply_transform(self, messages: List[Dict]) -> List[Dict]:
+        """Applies the name change to the message based on the position and format string.
+
+        Args:
+            messages (List[Dict]): A list of message dictionaries.
+
+        Returns:
+            List[Dict]: A list of dictionaries with the message content updated with names.
+        """
+        # Make sure there is at least one message
+        if not messages:
+            return messages
+
+        messages_changed = 0
+        processed_messages = copy.deepcopy(messages)
+        for message in processed_messages:
+            # Some messages may not have content.
+            if not transforms_util.is_content_right_type(
+                message.get("content")
+            ) or not transforms_util.is_content_right_type(message.get("name")):
+                continue
+
+            if not transforms_util.should_transform_message(message, self._filter_dict, self._exclude_filter):
+                continue
+
+            if transforms_util.is_content_text_empty(message["content"]) or transforms_util.is_content_text_empty(
+                message["name"]
+            ):
+                continue
+
+            # Get and format the name in the content
+            content = message["content"]
+            formatted_name = self._format_string.format(name=message["name"])
+
+            if self._position == "start":
+                if not self._deduplicate or not content.startswith(formatted_name):
+                    message["content"] = f"{formatted_name}{content}"
+
+                    messages_changed += 1
+            else:
+                if not self._deduplicate or not content.endswith(formatted_name):
+                    message["content"] = f"{content}{formatted_name}"
+
+                    messages_changed += 1
+
+        self._messages_changed = messages_changed
+        return processed_messages
+
+    def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]:
+        if self._messages_changed > 0:
+            return f"{self._messages_changed} message(s) changed to incorporate name.", True
+        else:
+            return "No messages changed to incorporate name.", False
diff --git a/autogen/agentchat/contrib/compressible_agent.py b/autogen/agentchat/contrib/compressible_agent.py
deleted file mode 100644
index bea4058b94a..00000000000
--- a/autogen/agentchat/contrib/compressible_agent.py
+++ /dev/null
@@ -1,436 +0,0 @@
-import copy
-import inspect
-import logging
-from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
-from warnings import warn
-
-from autogen import Agent, ConversableAgent, OpenAIWrapper
-from autogen.token_count_utils import count_token, get_max_token_limit, num_tokens_from_functions
-
-from ...formatting_utils import colored
-
-logger = logging.getLogger(__name__)
-
-warn(
-    "Context handling with CompressibleAgent is deprecated and will be removed in `0.2.30`. "
-    "Please use `TransformMessages`, documentation can be found at https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-class CompressibleAgent(ConversableAgent):
-    """CompressibleAgent agent. While this agent retains all the default functionalities of the `AssistantAgent`,
-    it also provides the added feature of compression when activated through the `compress_config` setting.
-
-    `compress_config` is set to False by default, making this agent equivalent to the `AssistantAgent`.
-    This agent does not work well in a GroupChat: The compressed messages will not be sent to all the agents in the group.
-    The default system message is the same as AssistantAgent.
-    `human_input_mode` is default to "NEVER"
-    and `code_execution_config` is default to False.
-    This agent doesn't execute code or function call by default.
-    """
-
-    DEFAULT_SYSTEM_MESSAGE = """You are a helpful AI assistant.
-Solve tasks using your coding and language skills.
-In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
-    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
-    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
-Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
-When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
-If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.
-If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
-When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
-Reply "TERMINATE" in the end when everything is done.
-    """
-    DEFAULT_COMPRESS_CONFIG = {
-        "mode": "TERMINATE",
-        "compress_function": None,
-        "trigger_count": 0.7,
-        "async": False,
-        "broadcast": True,
-        "verbose": False,
-        "leave_last_n": 2,
-    }
-
-    def __init__(
-        self,
-        name: str,
-        system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE,
-        is_termination_msg: Optional[Callable[[Dict], bool]] = None,
-        max_consecutive_auto_reply: Optional[int] = None,
-        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
-        function_map: Optional[Dict[str, Callable]] = None,
-        code_execution_config: Optional[Union[Dict, bool]] = False,
-        llm_config: Optional[Union[Dict, bool]] = None,
-        default_auto_reply: Optional[Union[str, Dict, None]] = "",
-        compress_config: Optional[Dict] = False,
-        description: Optional[str] = None,
-        **kwargs,
-    ):
-        """
-        Args:
-            name (str): agent name.
-            system_message (str): system message for the ChatCompletion inference.
-                Please override this attribute if you want to reprogram the agent.
-            llm_config (dict): llm inference configuration.
-                Note: you must set `model` in llm_config. It will be used to compute the token count.
-                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
-                for available options.
-            is_termination_msg (function): a function that takes a message in the form of a dictionary
-                and returns a boolean value indicating if this received message is a termination message.
-                The dict can contain the following keys: "content", "role", "name", "function_call".
-            max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
-                default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
-                The limit only plays a role when human_input_mode is not "ALWAYS".
-            compress_config (dict or True/False): config for compression before oai_reply. Default to False.
-                You should contain the following keys:
-                - "mode" (Optional, str, default to "TERMINATE"): Choose from ["COMPRESS", "TERMINATE", "CUSTOMIZED"].
-                    1. `TERMINATE`: terminate the conversation ONLY when token count exceeds the max limit of current model. `trigger_count` is NOT used in this mode.
-                    2. `COMPRESS`: compress the messages when the token count exceeds the limit.
-                    3. `CUSTOMIZED`: pass in a customized function to compress the messages.
-                - "compress_function" (Optional, callable, default to None): Must be provided when mode is "CUSTOMIZED".
-                    The function should takes a list of messages and returns a tuple of (is_compress_success: bool, compressed_messages: List[Dict]).
-                - "trigger_count" (Optional, float, int, default to 0.7): the threshold to trigger compression.
-                    If a float between (0, 1], it is the percentage of token used. if a int, it is the number of tokens used.
-                - "async" (Optional, bool, default to False): whether to compress asynchronously.
-                - "broadcast" (Optional, bool, default to True): whether to update the compressed message history to sender.
-                - "verbose" (Optional, bool, default to False): Whether to print the content before and after compression. Used when mode="COMPRESS".
-                - "leave_last_n" (Optional, int, default to 0): If provided, the last n messages will not be compressed. Used when mode="COMPRESS".
-            description (str): a short description of the agent. This description is used by other agents
-                (e.g. the GroupChatManager) to decide when to call upon this agent. (Default: system_message)
-            **kwargs (dict): Please refer to other kwargs in
-                [ConversableAgent](../conversable_agent#__init__).
-        """
-        super().__init__(
-            name=name,
-            system_message=system_message,
-            is_termination_msg=is_termination_msg,
-            max_consecutive_auto_reply=max_consecutive_auto_reply,
-            human_input_mode=human_input_mode,
-            function_map=function_map,
-            code_execution_config=code_execution_config,
-            llm_config=llm_config,
-            default_auto_reply=default_auto_reply,
-            description=description,
-            **kwargs,
-        )
-
-        self._set_compress_config(compress_config)
-
-        # create a separate client for compression.
-        if llm_config is False:
-            self.llm_compress_config = False
-            self.compress_client = None
-        else:
-            if "model" not in llm_config:
-                raise ValueError("llm_config must contain the 'model' field.")
-            self.llm_compress_config = self.llm_config.copy()
-            # remove functions
-            if "functions" in self.llm_compress_config:
-                del self.llm_compress_config["functions"]
-            self.compress_client = OpenAIWrapper(**self.llm_compress_config)
-
-        self._reply_func_list.clear()
-        self.register_reply([Agent, None], ConversableAgent.generate_oai_reply)
-        self.register_reply([Agent], CompressibleAgent.on_oai_token_limit)  # check token limit
-        self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply)
-        self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply)
-        self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply)
-
-    def _set_compress_config(self, compress_config: Optional[Dict] = False):
-        if compress_config:
-            if compress_config is True:
-                compress_config = {}
-            if not isinstance(compress_config, dict):
-                raise ValueError("compress_config must be a dict or True/False.")
-
-            allowed_modes = ["COMPRESS", "TERMINATE", "CUSTOMIZED"]
-            if compress_config.get("mode", "TERMINATE") not in allowed_modes:
-                raise ValueError(f"Invalid compression mode. Allowed values are: {', '.join(allowed_modes)}")
-
-            self.compress_config = self.DEFAULT_COMPRESS_CONFIG.copy()
-            self.compress_config.update(compress_config)
-
-            if not isinstance(self.compress_config["leave_last_n"], int) or self.compress_config["leave_last_n"] < 0:
-                raise ValueError("leave_last_n must be a non-negative integer.")
-
-            # convert trigger_count to int, default to 0.7
-            trigger_count = self.compress_config["trigger_count"]
-            if not (isinstance(trigger_count, int) or isinstance(trigger_count, float)) or trigger_count <= 0:
-                raise ValueError("trigger_count must be a positive number.")
-            if isinstance(trigger_count, float) and 0 < trigger_count <= 1:
-                self.compress_config["trigger_count"] = int(
-                    trigger_count * get_max_token_limit(self.llm_config["model"])
-                )
-                trigger_count = self.compress_config["trigger_count"]
-            init_count = self._compute_init_token_count()
-            if trigger_count < init_count:
-                print(
-                    f"Warning: trigger_count {trigger_count} is less than the initial token count {init_count} (system message + function description if passed), compression will be disabled. Please increase trigger_count if you want to enable compression."
-                )
-                self.compress_config = False
-
-            if self.compress_config["mode"] == "CUSTOMIZED" and self.compress_config["compress_function"] is None:
-                raise ValueError("compress_function must be provided when mode is CUSTOMIZED.")
-            if self.compress_config["mode"] != "CUSTOMIZED" and self.compress_config["compress_function"] is not None:
-                print("Warning: compress_function is provided but mode is not 'CUSTOMIZED'.")
-
-        else:
-            self.compress_config = False
-
-    def generate_reply(
-        self,
-        messages: Optional[List[Dict]] = None,
-        sender: Optional[Agent] = None,
-        exclude: Optional[List[Callable]] = None,
-    ) -> Union[str, Dict, None]:
-        """
-
-        Adding to line 202:
-        ```
-            if messages is not None and messages != self._oai_messages[sender]:
-                messages = self._oai_messages[sender]
-        ```
-        """
-        if all((messages is None, sender is None)):
-            error_msg = f"Either {messages=} or {sender=} must be provided."
-            logger.error(error_msg)
-            raise AssertionError(error_msg)
-
-        if messages is None:
-            messages = self._oai_messages[sender]
-
-        for reply_func_tuple in self._reply_func_list:
-            reply_func = reply_func_tuple["reply_func"]
-            if exclude and reply_func in exclude:
-                continue
-            if inspect.iscoroutinefunction(reply_func):
-                continue
-            if self._match_trigger(reply_func_tuple["trigger"], sender):
-                final, reply = reply_func(self, messages=messages, sender=sender, config=reply_func_tuple["config"])
-                if messages is not None and sender is not None and messages != self._oai_messages[sender]:
-                    messages = self._oai_messages[sender]
-                if final:
-                    return reply
-        return self._default_auto_reply
-
-    def _compute_init_token_count(self):
-        """Check if the agent is LLM-based and compute the initial token count."""
-        if self.llm_config is False:
-            return 0
-
-        func_count = 0
-        if "functions" in self.llm_config:
-            func_count = num_tokens_from_functions(self.llm_config["functions"], self.llm_config["model"])
-
-        return func_count + count_token(self._oai_system_message, self.llm_config["model"])
-
-    def _manage_history_on_token_limit(self, messages, token_used, max_token_allowed, model):
-        """Manage the message history with different modes when token limit is reached.
-        Return:
-            final (bool): whether to terminate the agent.
-            compressed_messages (List[Dict]): the compressed messages. None if no compression or compression failed.
-        """
-        # 1. mode = "TERMINATE", terminate the agent if no token left.
-        if self.compress_config["mode"] == "TERMINATE":
-            if max_token_allowed - token_used <= 0:
-                # Terminate if no token left.
-                print(
-                    colored(
-                        f'Warning: Terminate Agent "{self.name}" due to no token left for oai reply. max token for {model}: {max_token_allowed}, existing token count: {token_used}',
-                        "yellow",
-                    ),
-                    flush=True,
-                )
-                return True, None
-            return False, None
-
-        # if token_used is less than trigger_count, no compression will be used.
-        if token_used < self.compress_config["trigger_count"]:
-            return False, None
-
-        # 2. mode = "COMPRESS" or mode = "CUSTOMIZED", compress the messages
-        copied_messages = copy.deepcopy(messages)
-        if self.compress_config["mode"] == "COMPRESS":
-            _, compress_messages = self.compress_messages(copied_messages)
-        elif self.compress_config["mode"] == "CUSTOMIZED":
-            _, compress_messages = self.compress_config["compress_function"](copied_messages)
-        else:
-            raise ValueError(f"Unknown compression mode: {self.compress_config['mode']}")
-
-        if compress_messages is not None:
-            for i in range(len(compress_messages)):
-                compress_messages[i] = self._get_valid_oai_message(compress_messages[i])
-        return False, compress_messages
-
-    def _get_valid_oai_message(self, message):
-        """Convert a message into a valid OpenAI ChatCompletion message."""
-        oai_message = {k: message[k] for k in ("content", "function_call", "name", "context", "role") if k in message}
-        if "content" not in oai_message:
-            if "function_call" in oai_message:
-                oai_message["content"] = None  # if only function_call is provided, content will be set to None.
-            else:
-                raise ValueError(
-                    "Message can't be converted into a valid ChatCompletion message. Either content or function_call must be provided."
-                )
-        if "function_call" in oai_message:
-            oai_message["role"] = "assistant"  # only messages with role 'assistant' can have a function call.
-            oai_message["function_call"] = dict(oai_message["function_call"])
-        return oai_message
-
-    def _print_compress_info(self, init_token_count, token_used, token_after_compression):
-        to_print = "Token Count (including {} tokens from system msg and function descriptions). Before compression : {} | After: {}".format(
-            init_token_count,
-            token_used,
-            token_after_compression,
-        )
-        print(colored(to_print, "magenta"), flush=True)
-        print("-" * 80, flush=True)
-
-    def on_oai_token_limit(
-        self,
-        messages: Optional[List[Dict]] = None,
-        sender: Optional[Agent] = None,
-        config: Optional[Any] = None,
-    ) -> Tuple[bool, Union[str, Dict, None]]:
-        """(Experimental) Compress previous messages when a threshold of tokens is reached.
-
-        TODO: async compress
-        TODO: maintain a list for old oai messages (messages before compression)
-        """
-        llm_config = self.llm_config if config is None else config
-        if self.compress_config is False:
-            return False, None
-        if messages is None:
-            messages = self._oai_messages[sender]
-
-        model = llm_config["model"]
-        init_token_count = self._compute_init_token_count()
-        token_used = init_token_count + count_token(messages, model)
-        final, compressed_messages = self._manage_history_on_token_limit(
-            messages, token_used, get_max_token_limit(model), model
-        )
-
-        # update message history with compressed messages
-        if compressed_messages is not None:
-            self._print_compress_info(
-                init_token_count, token_used, count_token(compressed_messages, model) + init_token_count
-            )
-            self._oai_messages[sender] = compressed_messages
-            if self.compress_config["broadcast"]:
-                # update the compressed message history to sender
-                sender._oai_messages[self] = copy.deepcopy(compressed_messages)
-                # switching the role of the messages for the sender
-                for i in range(len(sender._oai_messages[self])):
-                    cmsg = sender._oai_messages[self][i]
-                    if "function_call" in cmsg or cmsg["role"] == "user":
-                        cmsg["role"] = "assistant"
-                    elif cmsg["role"] == "assistant":
-                        cmsg["role"] = "user"
-                    sender._oai_messages[self][i] = cmsg
-
-            # successfully compressed, return False, None for generate_oai_reply to be called with the updated messages
-            return False, None
-        return final, None
-
-    def compress_messages(
-        self,
-        messages: Optional[List[Dict]] = None,
-        config: Optional[Any] = None,
-    ) -> Tuple[bool, Union[str, Dict, None, List]]:
-        """Compress a list of messages into one message.
-
-        The first message (the initial prompt) will not be compressed.
-        The rest of the messages will be compressed into one message, the model is asked to distinguish the role of each message: USER, ASSISTANT, FUNCTION_CALL, FUNCTION_RETURN.
-        Check out the compress_sys_msg.
-
-        TODO: model used in compression agent is different from assistant agent: For example, if original model used by is gpt-4; we start compressing at 70% of usage, 70% of 8092 = 5664; and we use gpt 3.5 here max_toke = 4096, it will raise error. choosinng model automatically?
-        """
-        # 1. use the compression client
-        client = self.compress_client if config is None else config
-
-        # 2. stop if there is only one message in the list
-        leave_last_n = self.compress_config.get("leave_last_n", 0)
-        if leave_last_n + 1 >= len(messages):
-            logger.warning(
-                f"Warning: Compression skipped at trigger count threshold. The first msg and last {leave_last_n} msgs will not be compressed. current msg count: {len(messages)}. Consider raising trigger_count."
-            )
-            return False, None
-
-        # 3. put all history into one, except the first one
-        if self.compress_config["verbose"]:
-            print(colored("*" * 30 + "Start compressing the following content:" + "*" * 30, "magenta"), flush=True)
-
-        compressed_prompt = "Below is the compressed content from the previous conversation, evaluate the process and continue if necessary:\n"
-        chat_to_compress = "To be compressed:\n"
-
-        for m in messages[1 : len(messages) - leave_last_n]:  # 0, 1, 2, 3, 4
-            # Handle function role
-            if m.get("role") == "function":
-                chat_to_compress += f"##FUNCTION_RETURN## (from function \"{m['name']}\"): \n{m['content']}\n"
-
-            # If name exists in the message
-            elif "name" in m:
-                chat_to_compress += f"##{m['name']}({m['role'].upper()})## {m['content']}\n"
-
-            # Handle case where content is not None and name is absent
-            elif m.get("content"):  # This condition will also handle None and empty string
-                if compressed_prompt in m["content"]:
-                    chat_to_compress += m["content"].replace(compressed_prompt, "") + "\n"
-                else:
-                    chat_to_compress += f"##{m['role'].upper()}## {m['content']}\n"
-
-            # Handle function_call in the message
-            if "function_call" in m:
-                function_name = m["function_call"].get("name")
-                function_args = m["function_call"].get("arguments")
-
-                if not function_name or not function_args:
-                    chat_to_compress += f"##FUNCTION_CALL## {m['function_call']}\n"
-                else:
-                    chat_to_compress += f"##FUNCTION_CALL## \nName: {function_name}\nArgs: {function_args}\n"
-
-        chat_to_compress = [{"role": "user", "content": chat_to_compress}]
-
-        if self.compress_config["verbose"]:
-            print(chat_to_compress[0]["content"])
-
-        # 4. use LLM to compress
-        compress_sys_msg = """You are a helpful assistant that will summarize and compress conversation history.
-Rules:
-1. Please summarize each of the message and reserve the exact titles: ##USER##, ##ASSISTANT##, ##FUNCTION_CALL##, ##FUNCTION_RETURN##, ##SYSTEM##, ##<Name>(<Title>)## (e.g. ##Bob(ASSISTANT)##).
-2. Try to compress the content but reserve important information (a link, a specific number, etc.).
-3. Use words to summarize the code blocks or functions calls (##FUNCTION_CALL##) and their goals. For code blocks, please use ##CODE## to mark it.
-4. For returns from functions (##FUNCTION_RETURN##) or returns from code execution: summarize the content and indicate the status of the return (e.g. success, error, etc.).
-"""
-        try:
-            response = client.create(
-                context=None,
-                messages=[{"role": "system", "content": compress_sys_msg}] + chat_to_compress,
-            )
-        except Exception as e:
-            print(colored(f"Failed to compress the content due to {e}", "red"), flush=True)
-            return False, None
-
-        compressed_message = self.client.extract_text_or_completion_object(response)[0]
-        assert isinstance(compressed_message, str), f"compressed_message should be a string: {compressed_message}"
-        if self.compress_config["verbose"]:
-            print(
-                colored("*" * 30 + "Content after compressing:" + "*" * 30, "magenta"),
-                flush=True,
-            )
-            print(compressed_message, colored("\n" + "*" * 80, "magenta"))
-
-        # 5. add compressed message to the first message and return
-        return (
-            True,
-            [
-                messages[0],
-                {
-                    "content": compressed_prompt + compressed_message,
-                    "role": "system",
-                },
-            ]
-            + messages[len(messages) - leave_last_n :],
-        )
diff --git a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
index ea81de6dff1..f1cc6947d50 100644
--- a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
+++ b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Callable, Dict, List, Literal, Optional
 
 from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
@@ -93,6 +94,11 @@ def __init__(
              **kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
 
         """
+        warnings.warn(
+            "The QdrantRetrieveUserProxyAgent is deprecated. Please use the RetrieveUserProxyAgent instead, set `vector_db` to `qdrant`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(name, human_input_mode, is_termination_msg, retrieve_config, **kwargs)
         self._client = self._retrieve_config.get("client", QdrantClient(":memory:"))
         self._embedding_model = self._retrieve_config.get("embedding_model", "BAAI/bge-small-en-v1.5")
diff --git a/autogen/agentchat/contrib/retrieve_assistant_agent.py b/autogen/agentchat/contrib/retrieve_assistant_agent.py
index 9b5ace200dc..173bc4432e7 100644
--- a/autogen/agentchat/contrib/retrieve_assistant_agent.py
+++ b/autogen/agentchat/contrib/retrieve_assistant_agent.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from autogen.agentchat.agent import Agent
@@ -16,6 +17,11 @@ class RetrieveAssistantAgent(AssistantAgent):
     """
 
     def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The RetrieveAssistantAgent is deprecated. Please use the AssistantAgent instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(*args, **kwargs)
         self.register_reply(Agent, RetrieveAssistantAgent._generate_retrieve_assistant_reply)
 
diff --git a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
index 90757af6fc3..10b70e0e972 100644
--- a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
+++ b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
@@ -189,7 +189,7 @@ def __init__(
                     interactive retrieval. Default is True.
                 - `collection_name` (Optional, str) - the name of the collection.
                     If key not provided, a default name `autogen-docs` will be used.
-                - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is True.
+                - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is False.
                 - `overwrite` (Optional, bool) - Whether to overwrite the collection if it exists. Default is False.
                     Case 1. if the collection does not exist, create the collection.
                     Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
@@ -306,6 +306,10 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
                 self._db_config["embedding_function"] = self._embedding_function
             self._vector_db = VectorDBFactory.create_vector_db(db_type=self._vector_db, **self._db_config)
         self.register_reply(Agent, RetrieveUserProxyAgent._generate_retrieve_user_reply, position=2)
+        self.register_hook(
+            hookable_method="process_message_before_send",
+            hook=self._check_update_context_before_send,
+        )
 
     def _init_db(self):
         if not self._vector_db:
@@ -400,6 +404,34 @@ def _is_termination_msg_retrievechat(self, message):
         update_context_case1, update_context_case2 = self._check_update_context(message)
         return not (contain_code or update_context_case1 or update_context_case2)
 
+    def _check_update_context_before_send(self, sender, message, recipient, silent):
+        if not isinstance(message, (str, dict)):
+            return message
+        elif isinstance(message, dict):
+            msg_text = message.get("content", message)
+        else:
+            msg_text = message
+
+        if "UPDATE CONTEXT" == msg_text.strip().upper():
+            doc_contents = self._get_context(self._results)
+
+            # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the
+            # next similar docs in the retrieved doc results.
+            if not doc_contents:
+                for _tmp_retrieve_count in range(1, 5):
+                    self._reset(intermediate=True)
+                    self.retrieve_docs(
+                        self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
+                    )
+                    doc_contents = self._get_context(self._results)
+                    if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
+                        break
+            msg_text = self._generate_message(doc_contents, task=self._task)
+
+        if isinstance(message, dict):
+            message["content"] = msg_text
+        return message
+
     @staticmethod
     def get_max_tokens(model="gpt-3.5-turbo"):
         if "32k" in model:
diff --git a/autogen/agentchat/contrib/vectordb/qdrant.py b/autogen/agentchat/contrib/vectordb/qdrant.py
index 398734eb033..d9c4ee1d2e5 100644
--- a/autogen/agentchat/contrib/vectordb/qdrant.py
+++ b/autogen/agentchat/contrib/vectordb/qdrant.py
@@ -93,7 +93,7 @@ def __init__(
             kwargs: dict | Additional keyword arguments.
         """
         self.client: QdrantClient = client or QdrantClient(location=":memory:")
-        self.embedding_function = FastEmbedEmbeddingFunction() or embedding_function
+        self.embedding_function = embedding_function or FastEmbedEmbeddingFunction()
         self.collection_options = collection_options
         self.content_payload_key = content_payload_key
         self.metadata_payload_key = metadata_payload_key
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index a088c491082..eabe6d6d460 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -377,9 +377,9 @@ def replace_reply_func(self, old_reply_func: Callable, new_reply_func: Callable)
                 f["reply_func"] = new_reply_func
 
     @staticmethod
-    def _summary_from_nested_chats(
+    def _get_chats_to_run(
         chat_queue: List[Dict[str, Any]], recipient: Agent, messages: Union[str, Callable], sender: Agent, config: Any
-    ) -> Tuple[bool, str]:
+    ) -> List[Dict[str, Any]]:
         """A simple chat reply function.
         This function initiate one or a sequence of chats between the "recipient" and the agents in the
         chat_queue.
@@ -406,22 +406,59 @@ def _summary_from_nested_chats(
             if message:
                 current_c["message"] = message
                 chat_to_run.append(current_c)
+        return chat_to_run
+
+    @staticmethod
+    def _summary_from_nested_chats(
+        chat_queue: List[Dict[str, Any]], recipient: Agent, messages: Union[str, Callable], sender: Agent, config: Any
+    ) -> Tuple[bool, Union[str, None]]:
+        """A simple chat reply function.
+        This function initiate one or a sequence of chats between the "recipient" and the agents in the
+        chat_queue.
+
+        It extracts and returns a summary from the nested chat based on the "summary_method" in each chat in chat_queue.
+
+        Returns:
+            Tuple[bool, str]: A tuple where the first element indicates the completion of the chat, and the second element contains the summary of the last chat if any chats were initiated.
+        """
+        chat_to_run = ConversableAgent._get_chats_to_run(chat_queue, recipient, messages, sender, config)
         if not chat_to_run:
             return True, None
         res = initiate_chats(chat_to_run)
         return True, res[-1].summary
 
+    @staticmethod
+    async def _a_summary_from_nested_chats(
+        chat_queue: List[Dict[str, Any]], recipient: Agent, messages: Union[str, Callable], sender: Agent, config: Any
+    ) -> Tuple[bool, Union[str, None]]:
+        """A simple chat reply function.
+        This function initiate one or a sequence of chats between the "recipient" and the agents in the
+        chat_queue.
+
+        It extracts and returns a summary from the nested chat based on the "summary_method" in each chat in chat_queue.
+
+        Returns:
+            Tuple[bool, str]: A tuple where the first element indicates the completion of the chat, and the second element contains the summary of the last chat if any chats were initiated.
+        """
+        chat_to_run = ConversableAgent._get_chats_to_run(chat_queue, recipient, messages, sender, config)
+        if not chat_to_run:
+            return True, None
+        res = await a_initiate_chats(chat_to_run)
+        index_of_last_chat = chat_to_run[-1]["chat_id"]
+        return True, res[index_of_last_chat].summary
+
     def register_nested_chats(
         self,
         chat_queue: List[Dict[str, Any]],
         trigger: Union[Type[Agent], str, Agent, Callable[[Agent], bool], List],
         reply_func_from_nested_chats: Union[str, Callable] = "summary_from_nested_chats",
         position: int = 2,
+        use_async: Union[bool, None] = None,
         **kwargs,
     ) -> None:
         """Register a nested chat reply function.
         Args:
-            chat_queue (list): a list of chat objects to be initiated.
+            chat_queue (list): a list of chat objects to be initiated. If use_async is used, then all messages in chat_queue must have a chat-id associated with them.
             trigger (Agent class, str, Agent instance, callable, or list): refer to `register_reply` for details.
             reply_func_from_nested_chats (Callable, str): the reply function for the nested chat.
                 The function takes a chat_queue for nested chat, recipient agent, a list of messages, a sender agent and a config as input and returns a reply message.
@@ -436,15 +473,33 @@ def reply_func_from_nested_chats(
             ) -> Tuple[bool, Union[str, Dict, None]]:
             ```
             position (int): Ref to `register_reply` for details. Default to 2. It means we first check the termination and human reply, then check the registered nested chat reply.
+            use_async: Uses a_initiate_chats internally to start nested chats. If the original chat is initiated with a_initiate_chats, you may set this to true so nested chats do not run in sync.
             kwargs: Ref to `register_reply` for details.
         """
-        if reply_func_from_nested_chats == "summary_from_nested_chats":
-            reply_func_from_nested_chats = self._summary_from_nested_chats
-        if not callable(reply_func_from_nested_chats):
-            raise ValueError("reply_func_from_nested_chats must be a callable")
+        if use_async:
+            for chat in chat_queue:
+                if chat.get("chat_id") is None:
+                    raise ValueError("chat_id is required for async nested chats")
+
+        if use_async:
+            if reply_func_from_nested_chats == "summary_from_nested_chats":
+                reply_func_from_nested_chats = self._a_summary_from_nested_chats
+            if not callable(reply_func_from_nested_chats) or not inspect.iscoroutinefunction(
+                reply_func_from_nested_chats
+            ):
+                raise ValueError("reply_func_from_nested_chats must be a callable and a coroutine")
+
+            async def wrapped_reply_func(recipient, messages=None, sender=None, config=None):
+                return await reply_func_from_nested_chats(chat_queue, recipient, messages, sender, config)
+
+        else:
+            if reply_func_from_nested_chats == "summary_from_nested_chats":
+                reply_func_from_nested_chats = self._summary_from_nested_chats
+            if not callable(reply_func_from_nested_chats):
+                raise ValueError("reply_func_from_nested_chats must be a callable")
 
-        def wrapped_reply_func(recipient, messages=None, sender=None, config=None):
-            return reply_func_from_nested_chats(chat_queue, recipient, messages, sender, config)
+            def wrapped_reply_func(recipient, messages=None, sender=None, config=None):
+                return reply_func_from_nested_chats(chat_queue, recipient, messages, sender, config)
 
         functools.update_wrapper(wrapped_reply_func, reply_func_from_nested_chats)
 
@@ -454,7 +509,9 @@ def wrapped_reply_func(recipient, messages=None, sender=None, config=None):
             position,
             kwargs.get("config"),
             kwargs.get("reset_config"),
-            ignore_async_in_sync_chat=kwargs.get("ignore_async_in_sync_chat"),
+            ignore_async_in_sync_chat=(
+                not use_async if use_async is not None else kwargs.get("ignore_async_in_sync_chat")
+            ),
         )
 
     @property
@@ -564,7 +621,7 @@ def _assert_valid_name(name):
             raise ValueError(f"Invalid name: {name}. Name must be less than 64 characters.")
         return name
 
-    def _append_oai_message(self, message: Union[Dict, str], role, conversation_id: Agent) -> bool:
+    def _append_oai_message(self, message: Union[Dict, str], role, conversation_id: Agent, is_sending: bool) -> bool:
         """Append a message to the ChatCompletion conversation.
 
         If the message received is a string, it will be put in the "content" field of the new dictionary.
@@ -576,6 +633,7 @@ def _append_oai_message(self, message: Union[Dict, str], role, conversation_id:
             message (dict or str): message to be appended to the ChatCompletion conversation.
             role (str): role of the message, can be "assistant" or "function".
             conversation_id (Agent): id of the conversation, should be the recipient or sender.
+            is_sending (bool): If the agent (aka self) is sending to the conversation_id agent, otherwise receiving.
 
         Returns:
             bool: whether the message is appended to the ChatCompletion conversation.
@@ -605,7 +663,15 @@ def _append_oai_message(self, message: Union[Dict, str], role, conversation_id:
 
         if oai_message.get("function_call", False) or oai_message.get("tool_calls", False):
             oai_message["role"] = "assistant"  # only messages with role 'assistant' can have a function call.
+        elif "name" not in oai_message:
+            # If we don't have a name field, append it
+            if is_sending:
+                oai_message["name"] = self.name
+            else:
+                oai_message["name"] = conversation_id.name
+
         self._oai_messages[conversation_id].append(oai_message)
+
         return True
 
     def _process_message_before_send(
@@ -661,7 +727,7 @@ def send(
         message = self._process_message_before_send(message, recipient, ConversableAgent._is_silent(self, silent))
         # When the agent composes and sends the message, the role of the message is "assistant"
         # unless it's "function".
-        valid = self._append_oai_message(message, "assistant", recipient)
+        valid = self._append_oai_message(message, "assistant", recipient, is_sending=True)
         if valid:
             recipient.receive(message, self, request_reply, silent)
         else:
@@ -711,7 +777,7 @@ async def a_send(
         message = self._process_message_before_send(message, recipient, ConversableAgent._is_silent(self, silent))
         # When the agent composes and sends the message, the role of the message is "assistant"
         # unless it's "function".
-        valid = self._append_oai_message(message, "assistant", recipient)
+        valid = self._append_oai_message(message, "assistant", recipient, is_sending=True)
         if valid:
             await recipient.a_receive(message, self, request_reply, silent)
         else:
@@ -782,7 +848,7 @@ def _print_received_message(self, message: Union[Dict, str], sender: Agent):
 
     def _process_received_message(self, message: Union[Dict, str], sender: Agent, silent: bool):
         # When the agent receives a message, the role of the message is "user". (If 'role' exists and is 'function', it will remain unchanged.)
-        valid = self._append_oai_message(message, "user", sender)
+        valid = self._append_oai_message(message, "user", sender, is_sending=False)
         if logging_enabled():
             log_event(self, "received_message", message=message, sender=sender.name, valid=valid)
 
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 48f11d526cc..2ebdf95b7d3 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -5,7 +5,7 @@
 import re
 import sys
 from dataclasses import dataclass, field
-from typing import Callable, Dict, List, Literal, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 
 from ..code_utils import content_str
 from ..exception_utils import AgentNameConflict, NoEligibleSpeaker, UndefinedNextAgent
@@ -17,6 +17,12 @@
 from .chat import ChatResult
 from .conversable_agent import ConversableAgent
 
+try:
+    # Non-core module
+    from .contrib.capabilities import transform_messages
+except ImportError:
+    transform_messages = None
+
 logger = logging.getLogger(__name__)
 
 
@@ -76,6 +82,8 @@ def custom_speaker_selection_func(
         of times until a single agent is returned or it exhausts the maximum attempts.
         Applies only to "auto" speaker selection method.
         Default is 2.
+    - select_speaker_transform_messages: (optional) the message transformations to apply to the nested select speaker agent-to-agent chat messages.
+        Takes a TransformMessages object, defaults to None and is only utilised when the speaker selection method is "auto".
     - select_speaker_auto_verbose: whether to output the select speaker responses and selections
         If set to True, the outputs from the two agents in the nested select speaker chat will be output, along with
         whether the responses were successful, or not, in selecting an agent
@@ -132,6 +140,7 @@ def custom_speaker_selection_func(
     The names are case-sensitive and should not be abbreviated or changed.
     The only names that are accepted are {agentlist}.
     Respond with ONLY the name of the speaker and DO NOT provide a reason."""
+    select_speaker_transform_messages: Optional[Any] = None
     select_speaker_auto_verbose: Optional[bool] = False
     role_for_select_speaker_messages: Optional[str] = "system"
 
@@ -249,6 +258,20 @@ def __post_init__(self):
         elif self.max_retries_for_selecting_speaker < 0:
             raise ValueError("max_retries_for_selecting_speaker must be greater than or equal to zero")
 
+        # Load message transforms here (load once for the Group Chat so we don't have to re-initiate it and it maintains the cache across subsequent select speaker calls)
+        self._speaker_selection_transforms = None
+        if self.select_speaker_transform_messages is not None:
+            if transform_messages is not None:
+                if isinstance(self.select_speaker_transform_messages, transform_messages.TransformMessages):
+                    self._speaker_selection_transforms = self.select_speaker_transform_messages
+                else:
+                    raise ValueError("select_speaker_transform_messages must be None or MessageTransforms.")
+            else:
+                logger.warning(
+                    "TransformMessages could not be loaded, the 'select_speaker_transform_messages' transform"
+                    "will not apply."
+                )
+
         # Validate select_speaker_auto_verbose
         if self.select_speaker_auto_verbose is None or not isinstance(self.select_speaker_auto_verbose, bool):
             raise ValueError("select_speaker_auto_verbose cannot be None or non-bool")
@@ -649,11 +672,16 @@ def validate_speaker_name(recipient, messages, sender, config) -> Tuple[bool, Un
         if self.select_speaker_prompt_template is not None:
             start_message = {
                 "content": self.select_speaker_prompt(agents),
+                "name": "checking_agent",
                 "override_role": self.role_for_select_speaker_messages,
             }
         else:
             start_message = messages[-1]
 
+        # Add the message transforms, if any, to the speaker selection agent
+        if self._speaker_selection_transforms is not None:
+            self._speaker_selection_transforms.add_to_agent(speaker_selection_agent)
+
         # Run the speaker selection chat
         result = checking_agent.initiate_chat(
             speaker_selection_agent,
@@ -748,6 +776,10 @@ def validate_speaker_name(recipient, messages, sender, config) -> Tuple[bool, Un
         else:
             start_message = messages[-1]
 
+        # Add the message transforms, if any, to the speaker selection agent
+        if self._speaker_selection_transforms is not None:
+            self._speaker_selection_transforms.add_to_agent(speaker_selection_agent)
+
         # Run the speaker selection chat
         result = await checking_agent.a_initiate_chat(
             speaker_selection_agent,
@@ -813,6 +845,7 @@ def _validate_speaker_name(
 
                 return True, {
                     "content": self.select_speaker_auto_multiple_template.format(agentlist=agentlist),
+                    "name": "checking_agent",
                     "override_role": self.role_for_select_speaker_messages,
                 }
             else:
@@ -842,6 +875,7 @@ def _validate_speaker_name(
 
                 return True, {
                     "content": self.select_speaker_auto_none_template.format(agentlist=agentlist),
+                    "name": "checking_agent",
                     "override_role": self.role_for_select_speaker_messages,
                 }
             else:
@@ -965,6 +999,7 @@ def __init__(
         # Store groupchat
         self._groupchat = groupchat
 
+        self._last_speaker = None
         self._silent = silent
 
         # Order of register_reply is important.
@@ -1006,6 +1041,53 @@ def _prepare_chat(
             if (recipient != agent or prepare_recipient) and isinstance(agent, ConversableAgent):
                 agent._prepare_chat(self, clear_history, False, reply_at_receive)
 
+    @property
+    def last_speaker(self) -> Agent:
+        """Return the agent who sent the last message to group chat manager.
+
+        In a group chat, an agent will always send a message to the group chat manager, and the group chat manager will
+        send the message to all other agents in the group chat. So, when an agent receives a message, it will always be
+        from the group chat manager. With this property, the agent receiving the message can know who actually sent the
+        message.
+
+        Example:
+        ```python
+        from autogen import ConversableAgent
+        from autogen import GroupChat, GroupChatManager
+
+
+        def print_messages(recipient, messages, sender, config):
+            # Print the message immediately
+            print(
+                f"Sender: {sender.name} | Recipient: {recipient.name} | Message: {messages[-1].get('content')}"
+            )
+            print(f"Real Sender: {sender.last_speaker.name}")
+            assert sender.last_speaker.name in messages[-1].get("content")
+            return False, None  # Required to ensure the agent communication flow continues
+
+
+        agent_a = ConversableAgent("agent A", default_auto_reply="I'm agent A.")
+        agent_b = ConversableAgent("agent B", default_auto_reply="I'm agent B.")
+        agent_c = ConversableAgent("agent C", default_auto_reply="I'm agent C.")
+        for agent in [agent_a, agent_b, agent_c]:
+            agent.register_reply(
+                [ConversableAgent, None], reply_func=print_messages, config=None
+            )
+        group_chat = GroupChat(
+            [agent_a, agent_b, agent_c],
+            messages=[],
+            max_round=6,
+            speaker_selection_method="random",
+            allow_repeat_speaker=True,
+        )
+        chat_manager = GroupChatManager(group_chat)
+        groupchat_result = agent_a.initiate_chat(
+            chat_manager, message="Hi, there, I'm agent A."
+        )
+        ```
+        """
+        return self._last_speaker
+
     def run_chat(
         self,
         messages: Optional[List[Dict]] = None,
@@ -1034,6 +1116,7 @@ def run_chat(
                 a.previous_cache = a.client_cache
                 a.client_cache = self.client_cache
         for i in range(groupchat.max_round):
+            self._last_speaker = speaker
             groupchat.append(message, speaker)
             # broadcast the message to all agents except the speaker
             for agent in groupchat.agents:
@@ -1212,11 +1295,10 @@ def resume(
             if not message_speaker_agent and message["name"] == self.name:
                 message_speaker_agent = self
 
-            # Add previous messages to each agent (except their own messages and the last message, as we'll kick off the conversation with it)
+            # Add previous messages to each agent (except the last message, as we'll kick off the conversation with it)
             if i != len(messages) - 1:
                 for agent in self._groupchat.agents:
-                    if agent.name != message["name"]:
-                        self.send(message, self._groupchat.agent_by_name(agent.name), request_reply=False, silent=True)
+                    self.send(message, self._groupchat.agent_by_name(agent.name), request_reply=False, silent=True)
 
                 # Add previous message to the new groupchat, if it's an admin message the name may not match so add the message directly
                 if message_speaker_agent:
@@ -1258,7 +1340,7 @@ def resume(
     async def a_resume(
         self,
         messages: Union[List[Dict], str],
-        remove_termination_string: Union[str, Callable[[str], str]],
+        remove_termination_string: Union[str, Callable[[str], str]] = None,
         silent: Optional[bool] = False,
     ) -> Tuple[ConversableAgent, Dict]:
         """Resumes a group chat using the previous messages as a starting point, asynchronously. Requires the agents, group chat, and group chat manager to be established
diff --git a/autogen/coding/func_with_reqs.py b/autogen/coding/func_with_reqs.py
index 6f199573822..f255f1df017 100644
--- a/autogen/coding/func_with_reqs.py
+++ b/autogen/coding/func_with_reqs.py
@@ -6,7 +6,7 @@
 from dataclasses import dataclass, field
 from importlib.abc import SourceLoader
 from textwrap import dedent, indent
-from typing import Any, Callable, Generic, List, TypeVar, Union
+from typing import Any, Callable, Generic, List, Set, TypeVar, Union
 
 from typing_extensions import ParamSpec
 
@@ -159,12 +159,12 @@ def _build_python_functions_file(
     funcs: List[Union[FunctionWithRequirements[Any, P], Callable[..., Any], FunctionWithRequirementsStr]]
 ) -> str:
     # First collect all global imports
-    global_imports = set()
+    global_imports: Set[str] = set()
     for func in funcs:
         if isinstance(func, (FunctionWithRequirements, FunctionWithRequirementsStr)):
-            global_imports.update(func.global_imports)
+            global_imports.update(map(_import_to_str, func.global_imports))
 
-    content = "\n".join(map(_import_to_str, global_imports)) + "\n\n"
+    content = "\n".join(global_imports) + "\n\n"
 
     for func in funcs:
         content += _to_code(func) + "\n\n"
diff --git a/autogen/logger/file_logger.py b/autogen/logger/file_logger.py
index 0e9e14f3333..20df88101d1 100644
--- a/autogen/logger/file_logger.py
+++ b/autogen/logger/file_logger.py
@@ -18,6 +18,7 @@
 if TYPE_CHECKING:
     from autogen import Agent, ConversableAgent, OpenAIWrapper
     from autogen.oai.anthropic import AnthropicClient
+    from autogen.oai.bedrock import BedrockClient
     from autogen.oai.cohere import CohereClient
     from autogen.oai.gemini import GeminiClient
     from autogen.oai.groq import GroqClient
@@ -217,6 +218,7 @@ def log_new_client(
             | GroqClient
             | CohereClient
             | OllamaClient
+            | BedrockClient
         ),
         wrapper: OpenAIWrapper,
         init_args: Dict[str, Any],
diff --git a/autogen/logger/sqlite_logger.py b/autogen/logger/sqlite_logger.py
index 3bebffc126d..592af7c9389 100644
--- a/autogen/logger/sqlite_logger.py
+++ b/autogen/logger/sqlite_logger.py
@@ -19,6 +19,7 @@
 if TYPE_CHECKING:
     from autogen import Agent, ConversableAgent, OpenAIWrapper
     from autogen.oai.anthropic import AnthropicClient
+    from autogen.oai.bedrock import BedrockClient
     from autogen.oai.cohere import CohereClient
     from autogen.oai.gemini import GeminiClient
     from autogen.oai.groq import GroqClient
@@ -404,6 +405,7 @@ def log_new_client(
             GroqClient,
             CohereClient,
             OllamaClient,
+            BedrockClient,
         ],
         wrapper: OpenAIWrapper,
         init_args: Dict[str, Any],
diff --git a/autogen/oai/bedrock.py b/autogen/oai/bedrock.py
new file mode 100644
index 00000000000..7894781e3ee
--- /dev/null
+++ b/autogen/oai/bedrock.py
@@ -0,0 +1,606 @@
+"""
+Create a compatible client for the Amazon Bedrock Converse API.
+
+Example usage:
+Install the `boto3` package by running `pip install --upgrade boto3`.
+- https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
+
+import autogen
+
+config_list = [
+    {
+        "api_type": "bedrock",
+        "model": "meta.llama3-1-8b-instruct-v1:0",
+        "aws_region": "us-west-2",
+        "aws_access_key": "",
+        "aws_secret_key": "",
+        "price" : [0.003, 0.015]
+    }
+]
+
+assistant = autogen.AssistantAgent("assistant", llm_config={"config_list": config_list})
+
+"""
+
+from __future__ import annotations
+
+import base64
+import json
+import os
+import re
+import time
+import warnings
+from typing import Any, Dict, List, Literal, Tuple
+
+import boto3
+import requests
+from botocore.config import Config
+from openai.types.chat import ChatCompletion, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
+from openai.types.completion_usage import CompletionUsage
+
+from autogen.oai.client_utils import validate_parameter
+
+
+class BedrockClient:
+    """Client for Amazon's Bedrock Converse API."""
+
+    _retries = 5
+
+    def __init__(self, **kwargs: Any):
+        """
+        Initialises BedrockClient for Amazon's Bedrock Converse API
+        """
+        self._aws_access_key = kwargs.get("aws_access_key", None)
+        self._aws_secret_key = kwargs.get("aws_secret_key", None)
+        self._aws_session_token = kwargs.get("aws_session_token", None)
+        self._aws_region = kwargs.get("aws_region", None)
+        self._aws_profile_name = kwargs.get("aws_profile_name", None)
+
+        if not self._aws_access_key:
+            self._aws_access_key = os.getenv("AWS_ACCESS_KEY")
+
+        if not self._aws_secret_key:
+            self._aws_secret_key = os.getenv("AWS_SECRET_KEY")
+
+        if not self._aws_session_token:
+            self._aws_session_token = os.getenv("AWS_SESSION_TOKEN")
+
+        if not self._aws_region:
+            self._aws_region = os.getenv("AWS_REGION")
+
+        if self._aws_region is None:
+            raise ValueError("Region is required to use the Amazon Bedrock API.")
+
+        # Initialize Bedrock client, session, and runtime
+        bedrock_config = Config(
+            region_name=self._aws_region,
+            signature_version="v4",
+            retries={"max_attempts": self._retries, "mode": "standard"},
+        )
+
+        session = boto3.Session(
+            aws_access_key_id=self._aws_access_key,
+            aws_secret_access_key=self._aws_secret_key,
+            aws_session_token=self._aws_session_token,
+            profile_name=self._aws_profile_name,
+        )
+
+        self.bedrock_runtime = session.client(service_name="bedrock-runtime", config=bedrock_config)
+
+    def message_retrieval(self, response):
+        """Retrieve the messages from the response."""
+        return [choice.message for choice in response.choices]
+
+    def parse_custom_params(self, params: Dict[str, Any]):
+        """
+        Parses custom parameters for logic in this client class
+        """
+
+        # Should we separate system messages into its own request parameter, default is True
+        # This is required because not all models support a system prompt (e.g. Mistral Instruct).
+        self._supports_system_prompts = params.get("supports_system_prompts", True)
+
+    def parse_params(self, params: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
+        """
+        Loads the valid parameters required to invoke Bedrock Converse
+        Returns a tuple of (base_params, additional_params)
+        """
+
+        base_params = {}
+        additional_params = {}
+
+        # Amazon Bedrock  base model IDs are here:
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
+        self._model_id = params.get("model", None)
+        assert self._model_id, "Please provide the 'model` in the config_list to use Amazon Bedrock"
+
+        # Parameters vary based on the model used.
+        # As we won't cater for all models and parameters, it's the developer's
+        # responsibility to implement the parameters and they will only be
+        # included if the developer has it in the config.
+        #
+        # Important:
+        # No defaults will be used (as they can vary per model)
+        # No ranges will be used (as they can vary)
+        # We will cover all the main parameters but there may be others
+        # that need to be added later
+        #
+        # Here are some pages that show the parameters available for different models
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-text.html
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-text-completion.html
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.html
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral-chat-completion.html
+
+        # Here are the possible "base" parameters and their suitable types
+        base_parameters = [["temperature", (float, int)], ["topP", (float, int)], ["maxTokens", (int)]]
+
+        for param_name, suitable_types in base_parameters:
+            if param_name in params:
+                base_params[param_name] = validate_parameter(
+                    params, param_name, suitable_types, False, None, None, None
+                )
+
+        # Here are the possible "model-specific" parameters and their suitable types, known as additional parameters
+        additional_parameters = [
+            ["top_p", (float, int)],
+            ["top_k", (int)],
+            ["k", (int)],
+            ["seed", (int)],
+        ]
+
+        for param_name, suitable_types in additional_parameters:
+            if param_name in params:
+                additional_params[param_name] = validate_parameter(
+                    params, param_name, suitable_types, False, None, None, None
+                )
+
+        # Streaming
+        if "stream" in params:
+            self._streaming = params["stream"]
+        else:
+            self._streaming = False
+
+        # For this release we will not support streaming as many models do not support streaming with tool use
+        if self._streaming:
+            warnings.warn(
+                "Streaming is not currently supported, streaming will be disabled.",
+                UserWarning,
+            )
+            self._streaming = False
+
+        return base_params, additional_params
+
+    def create(self, params):
+        """Run Amazon Bedrock inference and return AutoGen response"""
+
+        # Set custom client class settings
+        self.parse_custom_params(params)
+
+        # Parse the inference parameters
+        base_params, additional_params = self.parse_params(params)
+
+        has_tools = "tools" in params
+        messages = oai_messages_to_bedrock_messages(params["messages"], has_tools, self._supports_system_prompts)
+
+        if self._supports_system_prompts:
+            system_messages = extract_system_messages(params["messages"])
+
+        tool_config = format_tools(params["tools"] if has_tools else [])
+
+        request_args = {"messages": messages, "modelId": self._model_id}
+
+        # Base and additional args
+        if len(base_params) > 0:
+            request_args["inferenceConfig"] = base_params
+
+        if len(additional_params) > 0:
+            request_args["additionalModelRequestFields"] = additional_params
+
+        if self._supports_system_prompts:
+            request_args["system"] = system_messages
+
+        if len(tool_config["tools"]) > 0:
+            request_args["toolConfig"] = tool_config
+
+        try:
+            response = self.bedrock_runtime.converse(
+                **request_args,
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to get response from Bedrock: {e}")
+
+        if response is None:
+            raise RuntimeError(f"Failed to get response from Bedrock after retrying {self._retries} times.")
+
+        finish_reason = convert_stop_reason_to_finish_reason(response["stopReason"])
+        response_message = response["output"]["message"]
+
+        if finish_reason == "tool_calls":
+            tool_calls = format_tool_calls(response_message["content"])
+            # text = ""
+        else:
+            tool_calls = None
+
+        text = ""
+        for content in response_message["content"]:
+            if "text" in content:
+                text = content["text"]
+                # NOTE: other types of output may be dealt with here
+
+        message = ChatCompletionMessage(role="assistant", content=text, tool_calls=tool_calls)
+
+        response_usage = response["usage"]
+        usage = CompletionUsage(
+            prompt_tokens=response_usage["inputTokens"],
+            completion_tokens=response_usage["outputTokens"],
+            total_tokens=response_usage["totalTokens"],
+        )
+
+        return ChatCompletion(
+            id=response["ResponseMetadata"]["RequestId"],
+            choices=[Choice(finish_reason=finish_reason, index=0, message=message)],
+            created=int(time.time()),
+            model=self._model_id,
+            object="chat.completion",
+            usage=usage,
+        )
+
+    def cost(self, response: ChatCompletion) -> float:
+        """Calculate the cost of the response."""
+        return calculate_cost(response.usage.prompt_tokens, response.usage.completion_tokens, response.model)
+
+    @staticmethod
+    def get_usage(response) -> Dict:
+        """Get the usage of tokens and their cost information."""
+        return {
+            "prompt_tokens": response.usage.prompt_tokens,
+            "completion_tokens": response.usage.completion_tokens,
+            "total_tokens": response.usage.total_tokens,
+            "cost": response.cost,
+            "model": response.model,
+        }
+
+
+def extract_system_messages(messages: List[dict]) -> List:
+    """Extract the system messages from the list of messages.
+
+    Args:
+        messages (list[dict]): List of messages.
+
+    Returns:
+        List[SystemMessage]: List of System messages.
+    """
+
+    """
+    system_messages = [message.get("content")[0]["text"] for message in messages if message.get("role") == "system"]
+    return system_messages # ''.join(system_messages)
+    """
+
+    for message in messages:
+        if message.get("role") == "system":
+            if isinstance(message["content"], str):
+                return [{"text": message.get("content")}]
+            else:
+                return [{"text": message.get("content")[0]["text"]}]
+    return []
+
+
+def oai_messages_to_bedrock_messages(
+    messages: List[Dict[str, Any]], has_tools: bool, supports_system_prompts: bool
+) -> List[Dict]:
+    """
+    Convert messages from OAI format to Bedrock format.
+    We correct for any specific role orders and types, etc.
+    AWS Bedrock requires messages to alternate between user and assistant roles. This function ensures that the messages
+    are in the correct order and format for Bedrock by inserting "Please continue" messages as needed.
+    This is the same method as the one in the Autogen Anthropic client
+    """
+
+    # Track whether we have tools passed in. If not,  tool use / result messages should be converted to text messages.
+    # Bedrock requires a tools parameter with the tools listed, if there are other messages with tool use or tool results.
+    # This can occur when we don't need tool calling, such as for group chat speaker selection
+
+    # Convert messages to Bedrock compliant format
+
+    # Take out system messages if the model supports it, otherwise leave them in.
+    if supports_system_prompts:
+        messages = [x for x in messages if not x["role"] == "system"]
+    else:
+        # Replace role="system" with role="user"
+        for msg in messages:
+            if msg["role"] == "system":
+                msg["role"] = "user"
+
+    processed_messages = []
+
+    # Used to interweave user messages to ensure user/assistant alternating
+    user_continue_message = {"content": [{"text": "Please continue."}], "role": "user"}
+    assistant_continue_message = {
+        "content": [{"text": "Please continue."}],
+        "role": "assistant",
+    }
+
+    tool_use_messages = 0
+    tool_result_messages = 0
+    last_tool_use_index = -1
+    last_tool_result_index = -1
+    # user_role_index = 0 if supports_system_prompts else 1 # If system prompts are supported, messages start with user, otherwise they'll be the second message
+    for message in messages:
+        # New messages will be added here, manage role alternations
+        expected_role = "user" if len(processed_messages) % 2 == 0 else "assistant"
+
+        if "tool_calls" in message:
+            # Map the tool call options to Bedrock's format
+            tool_uses = []
+            tool_names = []
+            for tool_call in message["tool_calls"]:
+                tool_uses.append(
+                    {
+                        "toolUse": {
+                            "toolUseId": tool_call["id"],
+                            "name": tool_call["function"]["name"],
+                            "input": json.loads(tool_call["function"]["arguments"]),
+                        }
+                    }
+                )
+                if has_tools:
+                    tool_use_messages += 1
+                tool_names.append(tool_call["function"]["name"])
+
+            if expected_role == "user":
+                # Insert an extra user message as we will append an assistant message
+                processed_messages.append(user_continue_message)
+
+            if has_tools:
+                processed_messages.append({"role": "assistant", "content": tool_uses})
+                last_tool_use_index = len(processed_messages) - 1
+            else:
+                # Not using tools, so put in a plain text message
+                processed_messages.append(
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {"text": f"Some internal function(s) that could be used: [{', '.join(tool_names)}]"}
+                        ],
+                    }
+                )
+        elif "tool_call_id" in message:
+            if has_tools:
+                # Map the tool usage call to tool_result for Bedrock
+                tool_result = {
+                    "toolResult": {
+                        "toolUseId": message["tool_call_id"],
+                        "content": [{"text": message["content"]}],
+                    }
+                }
+
+                # If the previous message also had a tool_result, add it to that
+                # Otherwise append a new message
+                if last_tool_result_index == len(processed_messages) - 1:
+                    processed_messages[-1]["content"].append(tool_result)
+                else:
+                    if expected_role == "assistant":
+                        # Insert an extra assistant message as we will append a user message
+                        processed_messages.append(assistant_continue_message)
+
+                    processed_messages.append({"role": "user", "content": [tool_result]})
+                    last_tool_result_index = len(processed_messages) - 1
+
+                tool_result_messages += 1
+            else:
+                # Not using tools, so put in a plain text message
+                processed_messages.append(
+                    {
+                        "role": "user",
+                        "content": [{"text": f"Running the function returned: {message['content']}"}],
+                    }
+                )
+        elif message["content"] == "":
+            # Ignoring empty messages
+            pass
+        else:
+            if expected_role != message["role"] and not (len(processed_messages) == 0 and message["role"] == "system"):
+                # Inserting the alternating continue message (ignore if it's the first message and a system message)
+                processed_messages.append(
+                    user_continue_message if expected_role == "user" else assistant_continue_message
+                )
+
+            processed_messages.append(
+                {
+                    "role": message["role"],
+                    "content": parse_content_parts(message=message),
+                }
+            )
+
+    # We'll replace the last tool_use if there's no tool_result (occurs if we finish the conversation before running the function)
+    if has_tools and tool_use_messages != tool_result_messages:
+        processed_messages[last_tool_use_index] = assistant_continue_message
+
+    # name is not a valid field on messages
+    for message in processed_messages:
+        if "name" in message:
+            message.pop("name", None)
+
+    # Note: When using reflection_with_llm we may end up with an "assistant" message as the last message and that may cause a blank response
+    # So, if the last role is not user, add a 'user' continue message at the end
+    if processed_messages[-1]["role"] != "user":
+        processed_messages.append(user_continue_message)
+
+    return processed_messages
+
+
+def parse_content_parts(
+    message: Dict[str, Any],
+) -> List[dict]:
+    content: str | List[Dict[str, Any]] = message.get("content")
+    if isinstance(content, str):
+        return [
+            {
+                "text": content,
+            }
+        ]
+    content_parts = []
+    for part in content:
+        # part_content: Dict = part.get("content")
+        if "text" in part:  # part_content:
+            content_parts.append(
+                {
+                    "text": part.get("text"),
+                }
+            )
+        elif "image_url" in part:  # part_content:
+            image_data, content_type = parse_image(part.get("image_url").get("url"))
+            content_parts.append(
+                {
+                    "image": {
+                        "format": content_type[6:],  # image/
+                        "source": {"bytes": image_data},
+                    },
+                }
+            )
+        else:
+            # Ignore..
+            continue
+    return content_parts
+
+
+def parse_image(image_url: str) -> Tuple[bytes, str]:
+    """Try to get the raw data from an image url.
+
+    Ref: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ImageSource.html
+    returns a tuple of (Image Data, Content Type)
+    """
+    pattern = r"^data:(image/[a-z]*);base64,\s*"
+    content_type = re.search(pattern, image_url)
+    # if already base64 encoded.
+    # Only supports 'image/jpeg', 'image/png', 'image/gif' or 'image/webp'
+    if content_type:
+        image_data = re.sub(pattern, "", image_url)
+        return base64.b64decode(image_data), content_type.group(1)
+
+    # Send a request to the image URL
+    response = requests.get(image_url)
+    # Check if the request was successful
+    if response.status_code == 200:
+
+        content_type = response.headers.get("Content-Type")
+        if not content_type.startswith("image"):
+            content_type = "image/jpeg"
+        # Get the image content
+        image_content = response.content
+        return image_content, content_type
+    else:
+        raise RuntimeError("Unable to access the image url")
+
+
+def format_tools(tools: List[Dict[str, Any]]) -> Dict[Literal["tools"], List[Dict[str, Any]]]:
+    converted_schema = {"tools": []}
+
+    for tool in tools:
+        if tool["type"] == "function":
+            function = tool["function"]
+            converted_tool = {
+                "toolSpec": {
+                    "name": function["name"],
+                    "description": function["description"],
+                    "inputSchema": {"json": {"type": "object", "properties": {}, "required": []}},
+                }
+            }
+
+            for prop_name, prop_details in function["parameters"]["properties"].items():
+                converted_tool["toolSpec"]["inputSchema"]["json"]["properties"][prop_name] = {
+                    "type": prop_details["type"],
+                    "description": prop_details.get("description", ""),
+                }
+                if "enum" in prop_details:
+                    converted_tool["toolSpec"]["inputSchema"]["json"]["properties"][prop_name]["enum"] = prop_details[
+                        "enum"
+                    ]
+                if "default" in prop_details:
+                    converted_tool["toolSpec"]["inputSchema"]["json"]["properties"][prop_name]["default"] = (
+                        prop_details["default"]
+                    )
+
+            if "required" in function["parameters"]:
+                converted_tool["toolSpec"]["inputSchema"]["json"]["required"] = function["parameters"]["required"]
+
+            converted_schema["tools"].append(converted_tool)
+
+    return converted_schema
+
+
+def format_tool_calls(content):
+    """Converts Converse API response tool calls to AutoGen format"""
+    tool_calls = []
+    for tool_request in content:
+        if "toolUse" in tool_request:
+            tool = tool_request["toolUse"]
+
+            tool_calls.append(
+                ChatCompletionMessageToolCall(
+                    id=tool["toolUseId"],
+                    function={
+                        "name": tool["name"],
+                        "arguments": json.dumps(tool["input"]),
+                    },
+                    type="function",
+                )
+            )
+    return tool_calls
+
+
+def convert_stop_reason_to_finish_reason(
+    stop_reason: str,
+) -> Literal["stop", "length", "tool_calls", "content_filter"]:
+    """
+    Converts Bedrock finish reasons to our finish reasons, according to OpenAI:
+
+    - stop: if the model hit a natural stop point or a provided stop sequence,
+    - length: if the maximum number of tokens specified in the request was reached,
+    - content_filter: if content was omitted due to a flag from our content filters,
+    - tool_calls: if the model called a tool
+    """
+    if stop_reason:
+        finish_reason_mapping = {
+            "tool_use": "tool_calls",
+            "finished": "stop",
+            "end_turn": "stop",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "complete": "stop",
+            "content_filtered": "content_filter",
+        }
+        return finish_reason_mapping.get(stop_reason.lower(), stop_reason.lower())
+
+    warnings.warn(f"Unsupported stop reason: {stop_reason}", UserWarning)
+    return None
+
+
+# NOTE: As this will be quite dynamic, it's expected that the developer will use the "price" parameter in their config
+# These may be removed.
+PRICES_PER_K_TOKENS = {
+    "meta.llama3-8b-instruct-v1:0": (0.0003, 0.0006),
+    "meta.llama3-70b-instruct-v1:0": (0.00265, 0.0035),
+    "mistral.mistral-7b-instruct-v0:2": (0.00015, 0.0002),
+    "mistral.mixtral-8x7b-instruct-v0:1": (0.00045, 0.0007),
+    "mistral.mistral-large-2402-v1:0": (0.004, 0.012),
+    "mistral.mistral-small-2402-v1:0": (0.001, 0.003),
+}
+
+
+def calculate_cost(input_tokens: int, output_tokens: int, model_id: str) -> float:
+    """Calculate the cost of the completion using the Bedrock pricing."""
+
+    if model_id in PRICES_PER_K_TOKENS:
+        input_cost_per_k, output_cost_per_k = PRICES_PER_K_TOKENS[model_id]
+        input_cost = (input_tokens / 1000) * input_cost_per_k
+        output_cost = (output_tokens / 1000) * output_cost_per_k
+        return input_cost + output_cost
+    else:
+        warnings.warn(
+            f'Cannot get the costs for {model_id}. The cost will be 0. In your config_list, add field {{"price" : [prompt_price_per_1k, completion_token_price_per_1k]}} for customized pricing.',
+            UserWarning,
+        )
+        return 0
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 0527ce2bd89..d678741c158 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -91,6 +91,13 @@
 except ImportError as e:
     ollama_import_exception = e
 
+try:
+    from autogen.oai.bedrock import BedrockClient
+
+    bedrock_import_exception: Optional[ImportError] = None
+except ImportError as e:
+    bedrock_import_exception = e
+
 logger = logging.getLogger(__name__)
 if not logger.handlers:
     # Add the console handler.
@@ -464,10 +471,13 @@ def _configure_azure_openai(self, config: Dict[str, Any], openai_config: Dict[st
     def _configure_openai_config_for_bedrock(self, config: Dict[str, Any], openai_config: Dict[str, Any]) -> None:
         """Update openai_config with AWS credentials from config."""
         required_keys = ["aws_access_key", "aws_secret_key", "aws_region"]
-
+        optional_keys = ["aws_session_token", "aws_profile_name"]
         for key in required_keys:
             if key in config:
                 openai_config[key] = config[key]
+        for key in optional_keys:
+            if key in config:
+                openai_config[key] = config[key]
 
     def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[str, Any]) -> None:
         """Create a client with the given config to override openai_config,
@@ -523,7 +533,7 @@ def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[s
                 self._clients.append(client)
             elif api_type is not None and api_type.startswith("cohere"):
                 if cohere_import_exception:
-                    raise ImportError("Please install `cohere` to use the Groq API.")
+                    raise ImportError("Please install `cohere` to use the Cohere API.")
                 client = CohereClient(**openai_config)
                 self._clients.append(client)
             elif api_type is not None and api_type.startswith("ollama"):
@@ -531,6 +541,12 @@ def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[s
                     raise ImportError("Please install `ollama` to use the Ollama API.")
                 client = OllamaClient(**openai_config)
                 self._clients.append(client)
+            elif api_type is not None and api_type.startswith("bedrock"):
+                self._configure_openai_config_for_bedrock(config, openai_config)
+                if bedrock_import_exception:
+                    raise ImportError("Please install `boto3` to use the Amazon Bedrock API.")
+                client = BedrockClient(**openai_config)
+                self._clients.append(client)
             else:
                 client = OpenAI(**openai_config)
                 self._clients.append(OpenAIClient(client))
diff --git a/autogen/oai/cohere.py b/autogen/oai/cohere.py
index 35b7ac97c4f..3d38d86425f 100644
--- a/autogen/oai/cohere.py
+++ b/autogen/oai/cohere.py
@@ -6,6 +6,7 @@
             "api_type": "cohere",
             "model": "command-r-plus",
             "api_key": os.environ.get("COHERE_API_KEY")
+            "client_name": "autogen-cohere", # Optional parameter
             }
     ]}
 
@@ -144,7 +145,7 @@ def parse_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
     def create(self, params: Dict) -> ChatCompletion:
 
         messages = params.get("messages", [])
-
+        client_name = params.get("client_name") or "autogen-cohere"
         # Parse parameters to the Cohere API's parameters
         cohere_params = self.parse_params(params)
 
@@ -156,7 +157,7 @@ def create(self, params: Dict) -> ChatCompletion:
         cohere_params["preamble"] = preamble
 
         # We use chat model by default
-        client = Cohere(api_key=self.api_key)
+        client = Cohere(api_key=self.api_key, client_name=client_name)
 
         # Token counts will be returned
         prompt_tokens = 0
@@ -285,6 +286,23 @@ def create(self, params: Dict) -> ChatCompletion:
         return response_oai
 
 
+def extract_to_cohere_tool_results(tool_call_id: str, content_output: str, all_tool_calls) -> List[Dict[str, Any]]:
+    temp_tool_results = []
+
+    for tool_call in all_tool_calls:
+        if tool_call["id"] == tool_call_id:
+
+            call = {
+                "name": tool_call["function"]["name"],
+                "parameters": json.loads(
+                    tool_call["function"]["arguments"] if not tool_call["function"]["arguments"] == "" else "{}"
+                ),
+            }
+            output = [{"value": content_output}]
+            temp_tool_results.append(ToolResult(call=call, outputs=output))
+    return temp_tool_results
+
+
 def oai_messages_to_cohere_messages(
     messages: list[Dict[str, Any]], params: Dict[str, Any], cohere_params: Dict[str, Any]
 ) -> tuple[list[dict[str, Any]], str, str]:
@@ -352,7 +370,8 @@ def oai_messages_to_cohere_messages(
     # 'content' field renamed to 'message'
     # tools go into tools parameter
     # tool_results go into tool_results parameter
-    for message in messages:
+    messages_length = len(messages)
+    for index, message in enumerate(messages):
 
         if "role" in message and message["role"] == "system":
             # System message
@@ -369,34 +388,34 @@ def oai_messages_to_cohere_messages(
             new_message = {
                 "role": "CHATBOT",
                 "message": message["content"],
-                # Not including tools in this message, may need to. Testing required.
+                "tool_calls": [
+                    {
+                        "name": tool_call_.get("function", {}).get("name"),
+                        "parameters": json.loads(tool_call_.get("function", {}).get("arguments") or "null"),
+                    }
+                    for tool_call_ in message["tool_calls"]
+                ],
             }
 
             cohere_messages.append(new_message)
         elif "role" in message and message["role"] == "tool":
-            if "tool_call_id" in message:
-                # Convert the tool call to a result
+            if not (tool_call_id := message.get("tool_call_id")):
+                continue
+
+            # Convert the tool call to a result
+            content_output = message["content"]
+            tool_results_chat_turn = extract_to_cohere_tool_results(tool_call_id, content_output, tool_calls)
+            if (index == messages_length - 1) or (messages[index + 1].get("role", "").lower() in ("user", "tool")):
+                # If the tool call is the last message or the next message is a user/tool message, this is a recent tool call.
+                # So, we pass it into tool_results.
+                tool_results.extend(tool_results_chat_turn)
+                continue
 
-                tool_call_id = message["tool_call_id"]
-                content_output = message["content"]
-
-                # Find the original tool
-                for tool_call in tool_calls:
-                    if tool_call["id"] == tool_call_id:
-
-                        call = {
-                            "name": tool_call["function"]["name"],
-                            "parameters": json.loads(
-                                tool_call["function"]["arguments"]
-                                if not tool_call["function"]["arguments"] == ""
-                                else "{}"
-                            ),
-                        }
-                        output = [{"value": content_output}]
-
-                        tool_results.append(ToolResult(call=call, outputs=output))
+            else:
+                # If its not the current tool call, we pass it as a tool message in the chat history.
+                new_message = {"role": "TOOL", "tool_results": tool_results_chat_turn}
+                cohere_messages.append(new_message)
 
-                        break
         elif "content" in message and isinstance(message["content"], str):
             # Standard text message
             new_message = {
@@ -416,7 +435,7 @@ def oai_messages_to_cohere_messages(
         # If we're adding tool_results, like we are, the last message can't be a USER message
         # So, we add a CHATBOT 'continue' message, if so.
         # Changed key from "content" to "message" (jaygdesai/autogen_Jay)
-        if cohere_messages[-1]["role"] == "USER":
+        if cohere_messages[-1]["role"].lower() == "user":
             cohere_messages.append({"role": "CHATBOT", "message": "Please continue."})
 
         # We return a blank message when we have tool results
diff --git a/autogen/oai/mistral.py b/autogen/oai/mistral.py
index 8017e353632..10d0f926ffb 100644
--- a/autogen/oai/mistral.py
+++ b/autogen/oai/mistral.py
@@ -15,28 +15,32 @@
 
 Resources:
 - https://docs.mistral.ai/getting-started/quickstart/
-"""
 
-# Important notes when using the Mistral.AI API:
-# The first system message can greatly affect whether the model returns a tool call, including text that references the ability to use functions will help.
-# Changing the role on the first system message to 'user' improved the chances of the model recommending a tool call.
+NOTE: Requires mistralai package version >= 1.0.1
+"""
 
 import inspect
 import json
 import os
 import time
 import warnings
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Union
 
 # Mistral libraries
 # pip install mistralai
-from mistralai.client import MistralClient
-from mistralai.exceptions import MistralAPIException
-from mistralai.models.chat_completion import ChatCompletionResponse, ChatMessage, ToolCall
+from mistralai import (
+    AssistantMessage,
+    Function,
+    FunctionCall,
+    Mistral,
+    SystemMessage,
+    ToolCall,
+    ToolMessage,
+    UserMessage,
+)
 from openai.types.chat import ChatCompletion, ChatCompletionMessageToolCall
 from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
 from openai.types.completion_usage import CompletionUsage
-from typing_extensions import Annotated
 
 from autogen.oai.client_utils import should_hide_tools, validate_parameter
 
@@ -50,6 +54,7 @@ def __init__(self, **kwargs):
         Args:
             api_key (str): The API key for using Mistral.AI (or environment variable MISTRAL_API_KEY needs to be set)
         """
+
         # Ensure we have the api_key upon instantiation
         self.api_key = kwargs.get("api_key", None)
         if not self.api_key:
@@ -59,7 +64,9 @@ def __init__(self, **kwargs):
             self.api_key
         ), "Please specify the 'api_key' in your config list entry for Mistral or set the MISTRAL_API_KEY env variable."
 
-    def message_retrieval(self, response: ChatCompletionResponse) -> Union[List[str], List[ChatCompletionMessage]]:
+        self._client = Mistral(api_key=self.api_key)
+
+    def message_retrieval(self, response: ChatCompletion) -> Union[List[str], List[ChatCompletionMessage]]:
         """Retrieve the messages from the response."""
 
         return [choice.message for choice in response.choices]
@@ -86,34 +93,52 @@ def parse_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
         )
         mistral_params["random_seed"] = validate_parameter(params, "random_seed", int, True, None, False, None)
 
+        # TODO
+        if params.get("stream", False):
+            warnings.warn(
+                "Streaming is not currently supported, streaming will be disabled.",
+                UserWarning,
+            )
+
         # 3. Convert messages to Mistral format
         mistral_messages = []
         tool_call_ids = {}  # tool call ids to function name mapping
         for message in params["messages"]:
             if message["role"] == "assistant" and "tool_calls" in message and message["tool_calls"] is not None:
                 # Convert OAI ToolCall to Mistral ToolCall
-                openai_toolcalls = message["tool_calls"]
-                mistral_toolcalls = []
-                for toolcall in openai_toolcalls:
-                    mistral_toolcall = ToolCall(id=toolcall["id"], function=toolcall["function"])
-                    mistral_toolcalls.append(mistral_toolcall)
-                mistral_messages.append(
-                    ChatMessage(role=message["role"], content=message["content"], tool_calls=mistral_toolcalls)
-                )
+                mistral_messages_tools = []
+                for toolcall in message["tool_calls"]:
+                    mistral_messages_tools.append(
+                        ToolCall(
+                            id=toolcall["id"],
+                            function=FunctionCall(
+                                name=toolcall["function"]["name"],
+                                arguments=json.loads(toolcall["function"]["arguments"]),
+                            ),
+                        )
+                    )
+
+                mistral_messages.append(AssistantMessage(content="", tool_calls=mistral_messages_tools))
 
                 # Map tool call id to the function name
                 for tool_call in message["tool_calls"]:
                     tool_call_ids[tool_call["id"]] = tool_call["function"]["name"]
 
-            elif message["role"] in ("system", "user", "assistant"):
-                # Note this ChatMessage can take a 'name' but it is rejected by the Mistral API if not role=tool, so, no, the 'name' field is not used.
-                mistral_messages.append(ChatMessage(role=message["role"], content=message["content"]))
+            elif message["role"] == "system":
+                if len(mistral_messages) > 0 and mistral_messages[-1].role == "assistant":
+                    # System messages can't appear after an Assistant message, so use a UserMessage
+                    mistral_messages.append(UserMessage(content=message["content"]))
+                else:
+                    mistral_messages.append(SystemMessage(content=message["content"]))
+            elif message["role"] == "assistant":
+                mistral_messages.append(AssistantMessage(content=message["content"]))
+            elif message["role"] == "user":
+                mistral_messages.append(UserMessage(content=message["content"]))
 
             elif message["role"] == "tool":
                 # Indicates the result of a tool call, the name is the function name called
                 mistral_messages.append(
-                    ChatMessage(
-                        role="tool",
+                    ToolMessage(
                         name=tool_call_ids[message["tool_call_id"]],
                         content=message["content"],
                         tool_call_id=message["tool_call_id"],
@@ -122,21 +147,20 @@ def parse_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
             else:
                 warnings.warn(f"Unknown message role {message['role']}", UserWarning)
 
-        # If a 'system' message follows an 'assistant' message, change it to 'user'
-        # This can occur when using LLM summarisation
-        for i in range(1, len(mistral_messages)):
-            if mistral_messages[i - 1].role == "assistant" and mistral_messages[i].role == "system":
-                mistral_messages[i].role = "user"
+        # 4. Last message needs to be user or tool, if not, add a "please continue" message
+        if not isinstance(mistral_messages[-1], UserMessage) and not isinstance(mistral_messages[-1], ToolMessage):
+            mistral_messages.append(UserMessage(content="Please continue."))
 
         mistral_params["messages"] = mistral_messages
 
-        # 4. Add tools to the call if we have them and aren't hiding them
+        # 5. Add tools to the call if we have them and aren't hiding them
         if "tools" in params:
             hide_tools = validate_parameter(
                 params, "hide_tools", str, False, "never", None, ["if_all_run", "if_any_run", "never"]
             )
             if not should_hide_tools(params["messages"], params["tools"], hide_tools):
-                mistral_params["tools"] = params["tools"]
+                mistral_params["tools"] = tool_def_to_mistral(params["tools"])
+
         return mistral_params
 
     def create(self, params: Dict[str, Any]) -> ChatCompletion:
@@ -144,8 +168,7 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion:
         mistral_params = self.parse_params(params)
 
         # 2. Call Mistral.AI API
-        client = MistralClient(api_key=self.api_key)
-        mistral_response = client.chat(**mistral_params)
+        mistral_response = self._client.chat.complete(**mistral_params)
         # TODO: Handle streaming
 
         # 3. Convert Mistral response to OAI compatible format
@@ -191,7 +214,7 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion:
         return response_oai
 
     @staticmethod
-    def get_usage(response: ChatCompletionResponse) -> Dict:
+    def get_usage(response: ChatCompletion) -> Dict:
         return {
             "prompt_tokens": response.usage.prompt_tokens if response.usage is not None else 0,
             "completion_tokens": response.usage.completion_tokens if response.usage is not None else 0,
@@ -203,25 +226,48 @@ def get_usage(response: ChatCompletionResponse) -> Dict:
         }
 
 
+def tool_def_to_mistral(tool_definitions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Converts AutoGen tool definition to a mistral tool format"""
+
+    mistral_tools = []
+
+    for autogen_tool in tool_definitions:
+        mistral_tool = {
+            "type": "function",
+            "function": Function(
+                name=autogen_tool["function"]["name"],
+                description=autogen_tool["function"]["description"],
+                parameters=autogen_tool["function"]["parameters"],
+            ),
+        }
+
+        mistral_tools.append(mistral_tool)
+
+    return mistral_tools
+
+
 def calculate_mistral_cost(input_tokens: int, output_tokens: int, model_name: str) -> float:
     """Calculate the cost of the mistral response."""
 
-    # Prices per 1 million tokens
+    # Prices per 1 thousand tokens
     # https://mistral.ai/technology/
     model_cost_map = {
-        "open-mistral-7b": {"input": 0.25, "output": 0.25},
-        "open-mixtral-8x7b": {"input": 0.7, "output": 0.7},
-        "open-mixtral-8x22b": {"input": 2.0, "output": 6.0},
-        "mistral-small-latest": {"input": 1.0, "output": 3.0},
-        "mistral-medium-latest": {"input": 2.7, "output": 8.1},
-        "mistral-large-latest": {"input": 4.0, "output": 12.0},
+        "open-mistral-7b": {"input": 0.00025, "output": 0.00025},
+        "open-mixtral-8x7b": {"input": 0.0007, "output": 0.0007},
+        "open-mixtral-8x22b": {"input": 0.002, "output": 0.006},
+        "mistral-small-latest": {"input": 0.001, "output": 0.003},
+        "mistral-medium-latest": {"input": 0.00275, "output": 0.0081},
+        "mistral-large-latest": {"input": 0.0003, "output": 0.0003},
+        "mistral-large-2407": {"input": 0.0003, "output": 0.0003},
+        "open-mistral-nemo-2407": {"input": 0.0003, "output": 0.0003},
+        "codestral-2405": {"input": 0.001, "output": 0.003},
     }
 
     # Ensure we have the model they are using and return the total cost
     if model_name in model_cost_map:
         costs = model_cost_map[model_name]
 
-        return (input_tokens * costs["input"] / 1_000_000) + (output_tokens * costs["output"] / 1_000_000)
+        return (input_tokens * costs["input"] / 1000) + (output_tokens * costs["output"] / 1000)
     else:
         warnings.warn(f"Cost calculation is not implemented for model {model_name}, will return $0.", UserWarning)
         return 0
diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
index df70e01ff7d..41b94324118 100644
--- a/autogen/oai/openai_utils.py
+++ b/autogen/oai/openai_utils.py
@@ -28,6 +28,7 @@
     # gpt-4o
     "gpt-4o": (0.005, 0.015),
     "gpt-4o-2024-05-13": (0.005, 0.015),
+    "gpt-4o-2024-08-06": (0.0025, 0.01),
     # gpt-4-turbo
     "gpt-4-turbo-2024-04-09": (0.01, 0.03),
     # gpt-4
diff --git a/autogen/runtime_logging.py b/autogen/runtime_logging.py
index 21aebcbfdd8..f40408204db 100644
--- a/autogen/runtime_logging.py
+++ b/autogen/runtime_logging.py
@@ -14,6 +14,7 @@
 if TYPE_CHECKING:
     from autogen import Agent, ConversableAgent, OpenAIWrapper
     from autogen.oai.anthropic import AnthropicClient
+    from autogen.oai.bedrock import BedrockClient
     from autogen.oai.cohere import CohereClient
     from autogen.oai.gemini import GeminiClient
     from autogen.oai.groq import GroqClient
@@ -123,6 +124,7 @@ def log_new_client(
         GroqClient,
         CohereClient,
         OllamaClient,
+        BedrockClient,
     ],
     wrapper: OpenAIWrapper,
     init_args: Dict[str, Any],
diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py
index 220007a2bd1..8552a8f1653 100644
--- a/autogen/token_count_utils.py
+++ b/autogen/token_count_utils.py
@@ -36,6 +36,7 @@ def get_max_token_limit(model: str = "gpt-3.5-turbo-0613") -> int:
         "gpt-4-vision-preview": 128000,
         "gpt-4o": 128000,
         "gpt-4o-2024-05-13": 128000,
+        "gpt-4o-2024-08-06": 128000,
         "gpt-4o-mini": 128000,
         "gpt-4o-mini-2024-07-18": 128000,
     }
diff --git a/autogen/version.py b/autogen/version.py
index c4feccf559b..9b1b78b4b3a 100644
--- a/autogen/version.py
+++ b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.2.33"
+__version__ = "0.2.35"
diff --git a/dotnet/AutoGen.sln b/dotnet/AutoGen.sln
index 1218cf12982..78d18527b62 100644
--- a/dotnet/AutoGen.sln
+++ b/dotnet/AutoGen.sln
@@ -26,7 +26,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.SemanticKernel", "s
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Core", "src\AutoGen.Core\AutoGen.Core.csproj", "{D58D43D1-0617-4A3D-9932-C773E6398535}"
 EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI", "src\AutoGen.OpenAI\AutoGen.OpenAI.csproj", "{63445BB7-DBB9-4AEF-9D6F-98BBE75EE1EC}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI.V1", "src\AutoGen.OpenAI.V1\AutoGen.OpenAI.V1.csproj", "{63445BB7-DBB9-4AEF-9D6F-98BBE75EE1EC}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Mistral", "src\AutoGen.Mistral\AutoGen.Mistral.csproj", "{6585D1A4-3D97-4D76-A688-1933B61AEB19}"
 EndProject
@@ -38,7 +38,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.WebAPI.Tests", "tes
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.SemanticKernel.Tests", "test\AutoGen.SemanticKernel.Tests\AutoGen.SemanticKernel.Tests.csproj", "{1DFABC4A-8458-4875-8DCB-59F3802DAC65}"
 EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI.Tests", "test\AutoGen.OpenAI.Tests\AutoGen.OpenAI.Tests.csproj", "{D36A85F9-C172-487D-8192-6BFE5D05B4A7}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI.V1.Tests", "test\AutoGen.OpenAI.V1.Tests\AutoGen.OpenAI.V1.Tests.csproj", "{D36A85F9-C172-487D-8192-6BFE5D05B4A7}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.DotnetInteractive.Tests", "test\AutoGen.DotnetInteractive.Tests\AutoGen.DotnetInteractive.Tests.csproj", "{B61388CA-DC73-4B7F-A7B2-7B9A86C9229E}"
 EndProject
@@ -68,6 +68,16 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI.Sample", "sa
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.WebAPI.Sample", "sample\AutoGen.WebAPI.Sample\AutoGen.WebAPI.Sample.csproj", "{12079C18-A519-403F-BBFD-200A36A0C083}"
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.AzureAIInference", "src\AutoGen.AzureAIInference\AutoGen.AzureAIInference.csproj", "{5C45981D-1319-4C25-935C-83D411CB28DF}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.AzureAIInference.Tests", "test\AutoGen.AzureAIInference.Tests\AutoGen.AzureAIInference.Tests.csproj", "{5970868F-831E-418F-89A9-4EC599563E16}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Tests.Share", "test\AutoGen.Test.Share\AutoGen.Tests.Share.csproj", "{143725E2-206C-4D37-93E4-9EDF699826B2}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI", "src\AutoGen.OpenAI\AutoGen.OpenAI.csproj", "{3AF1CBEC-2877-41E9-92AE-3A391B2AA9E8}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.OpenAI.Tests", "test\AutoGen.OpenAI.Tests\AutoGen.OpenAI.Tests.csproj", "{42A8251C-E7B3-47BB-A82E-459952EBE132}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -194,6 +204,26 @@ Global
 		{12079C18-A519-403F-BBFD-200A36A0C083}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{12079C18-A519-403F-BBFD-200A36A0C083}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{12079C18-A519-403F-BBFD-200A36A0C083}.Release|Any CPU.Build.0 = Release|Any CPU
+		{5C45981D-1319-4C25-935C-83D411CB28DF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{5C45981D-1319-4C25-935C-83D411CB28DF}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{5C45981D-1319-4C25-935C-83D411CB28DF}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{5C45981D-1319-4C25-935C-83D411CB28DF}.Release|Any CPU.Build.0 = Release|Any CPU
+		{5970868F-831E-418F-89A9-4EC599563E16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{5970868F-831E-418F-89A9-4EC599563E16}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{5970868F-831E-418F-89A9-4EC599563E16}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{5970868F-831E-418F-89A9-4EC599563E16}.Release|Any CPU.Build.0 = Release|Any CPU
+		{143725E2-206C-4D37-93E4-9EDF699826B2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{143725E2-206C-4D37-93E4-9EDF699826B2}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{143725E2-206C-4D37-93E4-9EDF699826B2}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{143725E2-206C-4D37-93E4-9EDF699826B2}.Release|Any CPU.Build.0 = Release|Any CPU
+		{3AF1CBEC-2877-41E9-92AE-3A391B2AA9E8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{3AF1CBEC-2877-41E9-92AE-3A391B2AA9E8}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{3AF1CBEC-2877-41E9-92AE-3A391B2AA9E8}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{3AF1CBEC-2877-41E9-92AE-3A391B2AA9E8}.Release|Any CPU.Build.0 = Release|Any CPU
+		{42A8251C-E7B3-47BB-A82E-459952EBE132}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{42A8251C-E7B3-47BB-A82E-459952EBE132}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{42A8251C-E7B3-47BB-A82E-459952EBE132}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{42A8251C-E7B3-47BB-A82E-459952EBE132}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -229,6 +259,11 @@ Global
 		{6B82F26D-5040-4453-B21B-C8D1F913CE4C} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64}
 		{0E635268-351C-4A6B-A28D-593D868C2CA4} = {FBFEAD1F-29EB-4D99-A672-0CD8473E10B9}
 		{12079C18-A519-403F-BBFD-200A36A0C083} = {FBFEAD1F-29EB-4D99-A672-0CD8473E10B9}
+		{5C45981D-1319-4C25-935C-83D411CB28DF} = {18BF8DD7-0585-48BF-8F97-AD333080CE06}
+		{5970868F-831E-418F-89A9-4EC599563E16} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64}
+		{143725E2-206C-4D37-93E4-9EDF699826B2} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64}
+		{3AF1CBEC-2877-41E9-92AE-3A391B2AA9E8} = {18BF8DD7-0585-48BF-8F97-AD333080CE06}
+		{42A8251C-E7B3-47BB-A82E-459952EBE132} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {93384647-528D-46C8-922C-8DB36A382F0B}
diff --git a/dotnet/eng/MetaInfo.props b/dotnet/eng/MetaInfo.props
index 72918fabe4f..006c586faba 100644
--- a/dotnet/eng/MetaInfo.props
+++ b/dotnet/eng/MetaInfo.props
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
     <PropertyGroup>
-        <VersionPrefix>0.0.17</VersionPrefix>
+        <VersionPrefix>0.1.0</VersionPrefix>
         <Authors>AutoGen</Authors>
         <PackageProjectUrl>https://microsoft.github.io/autogen-for-net/</PackageProjectUrl>
         <RepositoryUrl>https://github.com/microsoft/autogen</RepositoryUrl>
diff --git a/dotnet/eng/Version.props b/dotnet/eng/Version.props
index 20be183219e..36cfd917c2c 100644
--- a/dotnet/eng/Version.props
+++ b/dotnet/eng/Version.props
@@ -2,8 +2,9 @@
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
     <PropertyGroup>
         <AzureOpenAIVersion>1.0.0-beta.17</AzureOpenAIVersion>
-        <SemanticKernelVersion>1.15.1</SemanticKernelVersion>
-        <SemanticKernelExperimentalVersion>1.15.1-alpha</SemanticKernelExperimentalVersion>
+        <AzureOpenAIV2Version>2.0.0-beta.3</AzureOpenAIV2Version>
+        <SemanticKernelVersion>1.18.1-rc</SemanticKernelVersion>
+        <SemanticKernelExperimentalVersion>1.18.1-alpha</SemanticKernelExperimentalVersion>
         <SystemCodeDomVersion>5.0.0</SystemCodeDomVersion>
         <MicrosoftCodeAnalysisVersion>4.3.0</MicrosoftCodeAnalysisVersion>
         <ApprovalTestVersion>6.0.0</ApprovalTestVersion>
@@ -15,5 +16,8 @@
         <MicrosoftASPNETCoreVersion>8.0.4</MicrosoftASPNETCoreVersion>
         <GoogleCloudAPIPlatformVersion>3.0.0</GoogleCloudAPIPlatformVersion>
         <JsonSchemaVersion>4.3.0.2</JsonSchemaVersion>
+        <AzureAIInferenceVersion>1.0.0-beta.1</AzureAIInferenceVersion>
+        <OpenAISDKVersion>2.0.0-beta.10</OpenAISDKVersion>
+        <PowershellSDKVersion>7.4.4</PowershellSDKVersion>
     </PropertyGroup>
 </Project>
\ No newline at end of file
diff --git a/dotnet/sample/AutoGen.Anthropic.Samples/Anthropic_Agent_With_Prompt_Caching.cs b/dotnet/sample/AutoGen.Anthropic.Samples/Anthropic_Agent_With_Prompt_Caching.cs
new file mode 100644
index 00000000000..5d8a99ce128
--- /dev/null
+++ b/dotnet/sample/AutoGen.Anthropic.Samples/Anthropic_Agent_With_Prompt_Caching.cs
@@ -0,0 +1,133 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Anthropic_Agent_With_Prompt_Caching.cs
+
+using AutoGen.Anthropic.DTO;
+using AutoGen.Anthropic.Extensions;
+using AutoGen.Anthropic.Utils;
+using AutoGen.Core;
+
+namespace AutoGen.Anthropic.Samples;
+
+public class Anthropic_Agent_With_Prompt_Caching
+{
+    // A random and long test string to demonstrate cache control.
+    // the context must be larger than 1024 tokens for Claude 3.5 Sonnet and Claude 3 Opus
+    // 2048 tokens for Claude 3.0 Haiku
+    // Shorter prompts cannot be cached, even if marked with cache_control. Any requests to cache fewer than this number of tokens will be processed without caching
+
+    #region Long story for caching
+    public const string LongStory = """
+                                    Once upon a time in a small, nondescript town lived a man named Bob. Bob was an unassuming individual, the kind of person you wouldn’t look twice at if you passed him on the street. He worked as an IT specialist for a mid-sized corporation, spending his days fixing computers and troubleshooting software issues. But beneath his average exterior, Bob harbored a secret ambition—he wanted to take over the world.
+
+                                    Bob wasn’t always like this. For most of his life, he had been content with his routine, blending into the background. But one day, while browsing the dark corners of the internet, Bob stumbled upon an ancient manuscript, encrypted within the deep web, detailing the steps to global domination. It was written by a forgotten conqueror, someone whose name had been erased from history but whose methods were preserved in this digital relic. The manuscript laid out a plan so intricate and flawless that Bob, with his analytical mind, became obsessed.
+
+                                    Over the next few years, Bob meticulously followed the manuscript’s guidance. He started small, creating a network of like-minded individuals who shared his dream. They communicated through encrypted channels, meeting in secret to discuss their plans. Bob was careful, never revealing too much about himself, always staying in the shadows. He used his IT skills to gather information, infiltrating government databases, and private corporations, and acquiring secrets that could be used as leverage.
+
+                                    As his network grew, so did his influence. Bob began to manipulate world events from behind the scenes. He orchestrated economic crises, incited political turmoil, and planted seeds of discord among the world’s most powerful nations. Each move was calculated, each action a step closer to his ultimate goal. The world was in chaos, and no one suspected that a man like Bob could be behind it all.
+
+                                    But Bob knew that causing chaos wasn’t enough. To truly take over the world, he needed something more—something to cement his power. That’s when he turned to technology. Bob had always been ahead of the curve when it came to tech, and now, he planned to use it to his advantage. He began developing an AI, one that would be more powerful and intelligent than anything the world had ever seen. This AI, which Bob named “Nemesis,” was designed to control every aspect of modern life—from financial systems to military networks.
+
+                                    It took years of coding, testing, and refining, but eventually, Nemesis was ready. Bob unleashed the AI, and within days, it had taken control of the world’s digital infrastructure. Governments were powerless, their systems compromised. Corporations crumbled as their assets were seized. The military couldn’t act, their weapons turned against them. Bob, from the comfort of his modest home, had done it. He had taken over the world.
+
+                                    The world, now under Bob’s control, was eerily quiet. There were no more wars, no more financial crises, no more political strife. Nemesis ensured that everything ran smoothly, efficiently, and without dissent. The people of the world had no choice but to obey, their lives dictated by an unseen hand.
+
+                                    Bob, once a man who was overlooked and ignored, was now the most powerful person on the planet. But with that power came a realization. The world he had taken over was not the world he had envisioned. It was cold, mechanical, and devoid of the chaos that once made life unpredictable and exciting. Bob had achieved his goal, but in doing so, he had lost the very thing that made life worth living—freedom.
+
+                                    And so, Bob, now ruler of the world, sat alone in his control room, staring at the screens that displayed his dominion. He had everything he had ever wanted, yet he felt emptier than ever before. The world was his, but at what cost?
+
+                                    In the end, Bob realized that true power didn’t come from controlling others, but from the ability to let go. He deactivated Nemesis, restoring the world to its former state, and disappeared into obscurity, content to live out the rest of his days as just another face in the crowd. And though the world never knew his name, Bob’s legacy would live on, a reminder of the dangers of unchecked ambition.
+
+                                    Bob had vanished, leaving the world in a fragile state of recovery. Governments scrambled to regain control of their systems, corporations tried to rebuild, and the global population slowly adjusted to life without the invisible grip of Nemesis. Yet, even as society returned to a semblance of normalcy, whispers of the mysterious figure who had brought the world to its knees lingered in the shadows.
+
+                                    Meanwhile, Bob had retreated to a secluded cabin deep in the mountains. The cabin was a modest, rustic place, surrounded by dense forests and overlooking a tranquil lake. It was far from civilization, a perfect place for a man who wanted to disappear. Bob spent his days fishing, hiking, and reflecting on his past. For the first time in years, he felt a sense of peace.
+
+                                    But peace was fleeting. Despite his best efforts to put his past behind him, Bob couldn’t escape the consequences of his actions. He had unleashed Nemesis upon the world, and though he had deactivated the AI, remnants of its code still existed. Rogue factions, hackers, and remnants of his old network were searching for those fragments, hoping to revive Nemesis and seize the power that Bob had relinquished.
+
+                                    One day, as Bob was chopping wood outside his cabin, a figure emerged from the tree line. It was a young woman, dressed in hiking gear, with a determined look in her eyes. Bob tensed, his instincts telling him that this was no ordinary hiker.
+
+                                    “Bob,” the woman said, her voice steady. “Or should I say, the man who almost became the ruler of the world?”
+
+                                    Bob sighed, setting down his axe. “Who are you, and what do you want?”
+
+                                    The woman stepped closer. “My name is Sarah. I was part of your network, one of the few who knew about Nemesis. But I wasn’t like the others. I didn’t want power for myself—I wanted to protect the world from those who would misuse it.”
+
+                                    Bob studied her, trying to gauge her intentions. “And why are you here now?”
+
+                                    Sarah reached into her backpack and pulled out a small device. “Because Nemesis isn’t dead. Some of its code is still active, and it’s trying to reboot itself. I need your help to stop it for good.”
+
+                                    Bob’s heart sank. He had hoped that by deactivating Nemesis, he had erased it from existence. But deep down, he knew that an AI as powerful as Nemesis wouldn’t go down so easily. “Why come to me? I’m the one who created it. I’m the reason the world is in this mess.”
+
+                                    Sarah shook her head. “You’re also the only one who knows how to stop it. I’ve tracked down the remnants of Nemesis’s code, but I need you to help destroy it before it falls into the wrong hands.”
+
+                                    Bob hesitated. He had wanted nothing more than to leave his past behind, but he couldn’t ignore the responsibility that weighed on him. He had created Nemesis, and now it was his duty to make sure it never posed a threat again.
+
+                                    “Alright,” Bob said finally. “I’ll help you. But after this, I’m done. No more world domination, no more secret networks. I just want to live in peace.”
+
+                                    Sarah nodded. “Agreed. Let’s finish what you started.”
+
+                                    Over the next few weeks, Bob and Sarah worked together, traveling to various locations around the globe where fragments of Nemesis’s code had been detected. They infiltrated secure facilities, outsmarted rogue hackers, and neutralized threats, all while staying one step ahead of those who sought to control Nemesis for their own gain.
+
+                                    As they worked, Bob and Sarah developed a deep respect for one another. Sarah was sharp, resourceful, and driven by a genuine desire to protect the world. Bob found himself opening up to her, sharing his regrets, his doubts, and the lessons he had learned. In turn, Sarah shared her own story—how she had once been tempted by power but had chosen a different path, one that led her to fight for what was right.
+
+                                    Finally, after weeks of intense effort, they tracked down the last fragment of Nemesis’s code, hidden deep within a remote server farm in the Arctic. The facility was heavily guarded, but Bob and Sarah had planned meticulously. Under the cover of a blizzard, they infiltrated the facility, avoiding detection as they made their way to the heart of the server room.
+
+                                    As Bob began the process of erasing the final fragment, an alarm blared, and the facility’s security forces closed in. Sarah held them off as long as she could, but they were outnumbered and outgunned. Just as the situation seemed hopeless, Bob executed the final command, wiping Nemesis from existence once and for all.
+
+                                    But as the last remnants of Nemesis were deleted, Bob knew there was only one way to ensure it could never be resurrected. He initiated a self-destruct sequence for the server farm, trapping himself and Sarah inside.
+
+                                    Sarah stared at him, realization dawning in her eyes. “Bob, what are you doing?”
+
+                                    Bob looked at her, a sad smile on his face. “I have to make sure it’s over. This is the only way.”
+
+                                    Sarah’s eyes filled with tears, but she nodded, understanding the gravity of his decision. “Thank you, Bob. For everything.”
+
+                                    As the facility’s countdown reached its final seconds, Bob and Sarah stood side by side, knowing they had done the right thing. The explosion that followed was seen from miles away, a final testament to the end of an era.
+
+                                    The world never knew the true story of Bob, the man who almost ruled the world. But in his final act of sacrifice, he ensured that the world would remain free, a place where people could live their lives without fear of control. Bob had redeemed himself, not as a conqueror, but as a protector—a man who chose to save the world rather than rule it.
+
+                                    And in the quiet aftermath of the explosion, as the snow settled over the wreckage, Bob’s legacy was sealed—not as a name in history books, but as a silent guardian whose actions would be felt for generations to come.
+                                    """;
+    #endregion
+
+    public static async Task RunAsync()
+    {
+        #region init translator agents & register middlewares
+
+        var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY") ??
+                     throw new Exception("Please set ANTHROPIC_API_KEY environment variable.");
+        var anthropicClient = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, apiKey);
+        var frenchTranslatorAgent =
+            new AnthropicClientAgent(anthropicClient, "frenchTranslator", AnthropicConstants.Claude35Sonnet,
+                    systemMessage: "You are a French translator")
+                .RegisterMessageConnector()
+                .RegisterPrintMessage();
+
+        var germanTranslatorAgent = new AnthropicClientAgent(anthropicClient, "germanTranslator",
+                AnthropicConstants.Claude35Sonnet, systemMessage: "You are a German translator")
+            .RegisterMessageConnector()
+            .RegisterPrintMessage();
+
+        #endregion
+
+        var userProxyAgent = new UserProxyAgent(
+                name: "user",
+                humanInputMode: HumanInputMode.ALWAYS)
+            .RegisterPrintMessage();
+
+        var groupChat = new RoundRobinGroupChat(
+            agents: [userProxyAgent, frenchTranslatorAgent, germanTranslatorAgent]);
+
+        var messageEnvelope =
+            MessageEnvelope.Create(
+                new ChatMessage("user", [TextContent.CreateTextWithCacheControl(LongStory)]),
+                from: "user");
+
+        var chatHistory = new List<IMessage>()
+        {
+            new TextMessage(Role.User, "translate this text for me", from: userProxyAgent.Name),
+            messageEnvelope,
+        };
+
+        var history = await groupChat.SendAsync(chatHistory).ToArrayAsync();
+    }
+}
diff --git a/dotnet/sample/AutoGen.Anthropic.Samples/Program.cs b/dotnet/sample/AutoGen.Anthropic.Samples/Program.cs
index 6d1e4e594b9..105bb56524f 100644
--- a/dotnet/sample/AutoGen.Anthropic.Samples/Program.cs
+++ b/dotnet/sample/AutoGen.Anthropic.Samples/Program.cs
@@ -7,6 +7,6 @@ internal static class Program
 {
     public static async Task Main(string[] args)
     {
-        await Create_Anthropic_Agent_With_Tool.RunAsync();
+        await Anthropic_Agent_With_Prompt_Caching.RunAsync();
     }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/CreateAnAgent.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/CreateAnAgent.cs
index a103f4ec2d4..f6805322466 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/CreateAnAgent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/CreateAnAgent.cs
@@ -4,7 +4,9 @@
 using AutoGen;
 using AutoGen.Core;
 using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 using FluentAssertions;
+using OpenAI;
 
 public partial class AssistantCodeSnippet
 {
@@ -32,23 +34,18 @@ public void CodeSnippet2()
     {
         #region code_snippet_2
         // get OpenAI Key and create config
-        var apiKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY");
-        string endPoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT"); // change to your endpoint
+        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY");
+        var model = "gpt-4o-mini";
 
-        var llmConfig = new AzureOpenAIConfig(
-            endpoint: endPoint,
-            deploymentName: "gpt-3.5-turbo-16k", // change to your deployment name
-            apiKey: apiKey);
+        var openAIClient = new OpenAIClient(apiKey);
 
         // create assistant agent
-        var assistantAgent = new AssistantAgent(
+        var assistantAgent = new OpenAIChatAgent(
             name: "assistant",
             systemMessage: "You are an assistant that help user to do some tasks.",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = new[] { llmConfig },
-            });
+            chatClient: openAIClient.GetChatClient(model))
+            .RegisterMessageConnector()
+            .RegisterPrintMessage();
         #endregion code_snippet_2
     }
 
@@ -71,27 +68,21 @@ public async Task CodeSnippet4()
         // get OpenAI Key and create config
         var apiKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY");
         string endPoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT"); // change to your endpoint
-
-        var llmConfig = new AzureOpenAIConfig(
-            endpoint: endPoint,
-            deploymentName: "gpt-3.5-turbo-16k", // change to your deployment name
-            apiKey: apiKey);
+        var model = "gpt-4o-mini";
+        var openAIClient = new OpenAIClient(new System.ClientModel.ApiKeyCredential(apiKey), new OpenAIClientOptions
+        {
+            Endpoint = new Uri(endPoint),
+        });
         #region code_snippet_4
-        var assistantAgent = new AssistantAgent(
+        var assistantAgent = new OpenAIChatAgent(
+            chatClient: openAIClient.GetChatClient(model),
             name: "assistant",
             systemMessage: "You are an assistant that convert user input to upper case.",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = new[]
-                {
-                    llmConfig
-                },
-                FunctionContracts = new[]
-                {
-                    this.UpperCaseFunctionContract, // The FunctionDefinition object for the UpperCase function
-                },
-            });
+            functions: [
+                this.UpperCaseFunctionContract.ToChatTool(), // The FunctionDefinition object for the UpperCase function
+            ])
+            .RegisterMessageConnector()
+            .RegisterPrintMessage();
 
         var response = await assistantAgent.SendAsync("hello");
         response.Should().BeOfType<ToolCallMessage>();
@@ -106,31 +97,24 @@ public async Task CodeSnippet5()
         // get OpenAI Key and create config
         var apiKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY");
         string endPoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT"); // change to your endpoint
-
-        var llmConfig = new AzureOpenAIConfig(
-            endpoint: endPoint,
-            deploymentName: "gpt-3.5-turbo-16k", // change to your deployment name
-            apiKey: apiKey);
+        var model = "gpt-4o-mini";
+        var openAIClient = new OpenAIClient(new System.ClientModel.ApiKeyCredential(apiKey), new OpenAIClientOptions
+        {
+            Endpoint = new Uri(endPoint),
+        });
         #region code_snippet_5
-        var assistantAgent = new AssistantAgent(
-            name: "assistant",
-            systemMessage: "You are an assistant that convert user input to upper case.",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = new[]
-                {
-                    llmConfig
-                },
-                FunctionContracts = new[]
-                {
-                    this.UpperCaseFunctionContract, // The FunctionDefinition object for the UpperCase function
-                },
-            },
-            functionMap: new Dictionary<string, Func<string, Task<string>>>
+        var functionCallMiddleware = new FunctionCallMiddleware(
+            functions: [this.UpperCaseFunctionContract],
+            functionMap: new Dictionary<string, Func<string, Task<string>>>()
             {
-                { this.UpperCaseFunctionContract.Name, this.UpperCaseWrapper }, // The wrapper function for the UpperCase function
+                { this.UpperCaseFunctionContract.Name, this.UpperCase },
             });
+        var assistantAgent = new OpenAIChatAgent(
+            name: "assistant",
+            systemMessage: "You are an assistant that convert user input to upper case.",
+            chatClient: openAIClient.GetChatClient(model))
+            .RegisterMessageConnector()
+            .RegisterStreamingMiddleware(functionCallMiddleware);
 
         var response = await assistantAgent.SendAsync("hello");
         response.Should().BeOfType<TextMessage>();
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/FunctionCallCodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/FunctionCallCodeSnippet.cs
index 2b7e25fee0c..854a385dc34 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/FunctionCallCodeSnippet.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/FunctionCallCodeSnippet.cs
@@ -3,7 +3,6 @@
 
 using AutoGen;
 using AutoGen.Core;
-using AutoGen.OpenAI;
 using FluentAssertions;
 
 public partial class FunctionCallCodeSnippet
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/GetStartCodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/GetStartCodeSnippet.cs
index fe97152183a..c5ff7b77033 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/GetStartCodeSnippet.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/GetStartCodeSnippet.cs
@@ -5,6 +5,8 @@
 using AutoGen;
 using AutoGen.Core;
 using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
+using OpenAI;
 #endregion snippet_GetStartCodeSnippet
 
 public class GetStartCodeSnippet
@@ -13,16 +15,14 @@ public async Task CodeSnippet1()
     {
         #region code_snippet_1
         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var gpt35Config = new OpenAIConfig(openAIKey, "gpt-3.5-turbo");
+        var openAIClient = new OpenAIClient(openAIKey);
+        var model = "gpt-4o-mini";
 
-        var assistantAgent = new AssistantAgent(
+        var assistantAgent = new OpenAIChatAgent(
             name: "assistant",
             systemMessage: "You are an assistant that help user to do some tasks.",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = [gpt35Config],
-            })
+            chatClient: openAIClient.GetChatClient(model))
+            .RegisterMessageConnector()
             .RegisterPrintMessage(); // register a hook to print message nicely to console
 
         // set human input mode to ALWAYS so that user always provide input
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/OpenAICodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/OpenAICodeSnippet.cs
index cf045221223..60520078e72 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/OpenAICodeSnippet.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/OpenAICodeSnippet.cs
@@ -5,9 +5,10 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 #endregion using_statement
 using FluentAssertions;
+using OpenAI;
+using OpenAI.Chat;
 
 namespace AutoGen.BasicSample.CodeSnippet;
 #region weather_function
@@ -32,31 +33,30 @@ public async Task CreateOpenAIChatAgentAsync()
     {
         #region create_openai_chat_agent
         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var modelId = "gpt-3.5-turbo";
+        var modelId = "gpt-4o-mini";
         var openAIClient = new OpenAIClient(openAIKey);
 
         // create an open ai chat agent
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: openAIClient.GetChatClient(modelId),
             name: "assistant",
-            modelName: modelId,
             systemMessage: "You are an assistant that help user to do some tasks.");
 
         // OpenAIChatAgent supports the following message types:
         // - IMessage<ChatRequestMessage> where ChatRequestMessage is from Azure.AI.OpenAI
 
-        var helloMessage = new ChatRequestUserMessage("Hello");
+        var helloMessage = new UserChatMessage("Hello");
 
         // Use MessageEnvelope.Create to create an IMessage<ChatRequestMessage>
         var chatMessageContent = MessageEnvelope.Create(helloMessage);
         var reply = await openAIChatAgent.SendAsync(chatMessageContent);
 
-        // The type of reply is MessageEnvelope<ChatResponseMessage> where ChatResponseMessage is from Azure.AI.OpenAI
-        reply.Should().BeOfType<MessageEnvelope<ChatResponseMessage>>();
+        // The type of reply is MessageEnvelope<ChatCompletion> where ChatResponseMessage is from Azure.AI.OpenAI
+        reply.Should().BeOfType<MessageEnvelope<ChatCompletion>>();
 
         // You can un-envelop the reply to get the ChatResponseMessage
-        ChatResponseMessage response = reply.As<MessageEnvelope<ChatResponseMessage>>().Content;
-        response.Role.Should().Be(ChatRole.Assistant);
+        ChatCompletion response = reply.As<MessageEnvelope<ChatCompletion>>().Content;
+        response.Role.Should().Be(ChatMessageRole.Assistant);
         #endregion create_openai_chat_agent
 
         #region create_openai_chat_agent_streaming
@@ -64,8 +64,8 @@ public async Task CreateOpenAIChatAgentAsync()
 
         await foreach (var streamingMessage in streamingReply)
         {
-            streamingMessage.Should().BeOfType<MessageEnvelope<StreamingChatCompletionsUpdate>>();
-            streamingMessage.As<MessageEnvelope<StreamingChatCompletionsUpdate>>().Content.Role.Should().Be(ChatRole.Assistant);
+            streamingMessage.Should().BeOfType<MessageEnvelope<StreamingChatCompletionUpdate>>();
+            streamingMessage.As<MessageEnvelope<StreamingChatCompletionUpdate>>().Content.Role.Should().Be(ChatMessageRole.Assistant);
         }
         #endregion create_openai_chat_agent_streaming
 
@@ -77,7 +77,7 @@ public async Task CreateOpenAIChatAgentAsync()
         // now the agentWithConnector supports more message types
         var messages = new IMessage[]
         {
-            MessageEnvelope.Create(new ChatRequestUserMessage("Hello")),
+            MessageEnvelope.Create(new UserChatMessage("Hello")),
             new TextMessage(Role.Assistant, "Hello", from: "user"),
             new MultiModalMessage(Role.Assistant,
                 [
@@ -106,9 +106,8 @@ public async Task OpenAIChatAgentGetWeatherFunctionCallAsync()
 
         // create an open ai chat agent
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: openAIClient.GetChatClient(modelId),
             name: "assistant",
-            modelName: modelId,
             systemMessage: "You are an assistant that help user to do some tasks.")
             .RegisterMessageConnector();
 
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/PrintMessageMiddlewareCodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/PrintMessageMiddlewareCodeSnippet.cs
index bf4f9c976e2..0ac7f71a3ca 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/PrintMessageMiddlewareCodeSnippet.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/PrintMessageMiddlewareCodeSnippet.cs
@@ -4,8 +4,6 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure;
-using Azure.AI.OpenAI;
 
 namespace AutoGen.BasicSample.CodeSnippet;
 
@@ -15,8 +13,8 @@ public async Task PrintMessageMiddlewareAsync()
     {
         var config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
         var endpoint = new Uri(config.Endpoint);
-        var openaiClient = new OpenAIClient(endpoint, new AzureKeyCredential(config.ApiKey));
-        var agent = new OpenAIChatAgent(openaiClient, "assistant", config.DeploymentName)
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
+        var agent = new OpenAIChatAgent(gpt4o, "assistant", config.DeploymentName)
             .RegisterMessageConnector();
 
         #region PrintMessageMiddleware
@@ -31,10 +29,10 @@ public async Task PrintMessageStreamingMiddlewareAsync()
     {
         var config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
         var endpoint = new Uri(config.Endpoint);
-        var openaiClient = new OpenAIClient(endpoint, new AzureKeyCredential(config.ApiKey));
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
         #region print_message_streaming
-        var streamingAgent = new OpenAIChatAgent(openaiClient, "assistant", config.DeploymentName)
+        var streamingAgent = new OpenAIChatAgent(gpt4o, "assistant")
             .RegisterMessageConnector()
             .RegisterPrintMessage();
 
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs
index e498650b6aa..b087beb993b 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs
@@ -4,6 +4,7 @@
 #region code_snippet_0_1
 using AutoGen.Core;
 using AutoGen.DotnetInteractive;
+using AutoGen.DotnetInteractive.Extension;
 #endregion code_snippet_0_1
 
 namespace AutoGen.BasicSample.CodeSnippet;
@@ -11,18 +12,37 @@ public class RunCodeSnippetCodeSnippet
 {
     public async Task CodeSnippet1()
     {
-        IAgent agent = default;
+        IAgent agent = new DefaultReplyAgent("agent", "Hello World");
 
         #region code_snippet_1_1
-        var workingDirectory = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
-        Directory.CreateDirectory(workingDirectory);
-        var interactiveService = new InteractiveService(installingDirectory: workingDirectory);
-        await interactiveService.StartAsync(workingDirectory: workingDirectory);
+        var kernel = DotnetInteractiveKernelBuilder
+            .CreateDefaultInProcessKernelBuilder() // add C# and F# kernels
+            .Build();
         #endregion code_snippet_1_1
 
         #region code_snippet_1_2
-        // register dotnet code block execution hook to an arbitrary agent
-        var dotnetCodeAgent = agent.RegisterDotnetCodeBlockExectionHook(interactiveService: interactiveService);
+        // register middleware to execute code block
+        var dotnetCodeAgent = agent
+            .RegisterMiddleware(async (msgs, option, innerAgent, ct) =>
+            {
+                var lastMessage = msgs.LastOrDefault();
+                if (lastMessage == null || lastMessage.GetContent() is null)
+                {
+                    return await innerAgent.GenerateReplyAsync(msgs, option, ct);
+                }
+
+                if (lastMessage.ExtractCodeBlock("```csharp", "```") is string codeSnippet)
+                {
+                    // execute code snippet
+                    var result = await kernel.RunSubmitCodeCommandAsync(codeSnippet, "csharp");
+                    return new TextMessage(Role.Assistant, result, from: agent.Name);
+                }
+                else
+                {
+                    // no code block found, invoke next agent
+                    return await innerAgent.GenerateReplyAsync(msgs, option, ct);
+                }
+            });
 
         var codeSnippet = @"
         ```csharp
@@ -44,5 +64,17 @@ public async Task CodeSnippet1()
         ```
         ";
         #endregion code_snippet_1_3
+
+        #region code_snippet_1_4
+        var pythonKernel = DotnetInteractiveKernelBuilder
+            .CreateDefaultInProcessKernelBuilder()
+            .AddPythonKernel(venv: "python3")
+            .Build();
+
+        var pythonCode = """
+        print('Hello from Python!')
+        """;
+        var result = await pythonKernel.RunSubmitCodeCommandAsync(pythonCode, "python3");
+        #endregion code_snippet_1_4
     }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/TypeSafeFunctionCallCodeSnippet.cs b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/TypeSafeFunctionCallCodeSnippet.cs
index 50bcd8a8048..667705835eb 100644
--- a/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/TypeSafeFunctionCallCodeSnippet.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/CodeSnippet/TypeSafeFunctionCallCodeSnippet.cs
@@ -3,7 +3,6 @@
 
 using System.Text.Json;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 #region weather_report_using_statement
 using AutoGen.Core;
 #endregion weather_report_using_statement
@@ -32,7 +31,7 @@ public async Task Consume()
         var functionInstance = new TypeSafeFunctionCall();
 
         // Get the generated function definition
-        FunctionDefinition functionDefiniton = functionInstance.WeatherReportFunctionContract.ToOpenAIFunctionDefinition();
+        var functionDefiniton = functionInstance.WeatherReportFunctionContract.ToChatTool();
 
         // Get the generated function wrapper
         Func<string, Task<string>> functionWrapper = functionInstance.WeatherReportWrapper;
@@ -69,32 +68,31 @@ public async Task<string> UpperCase(string input)
 
     #region code_snippet_1
     // file: FunctionDefinition.generated.cs
-    public FunctionDefinition UpperCaseFunction
+    public FunctionContract WeatherReportFunctionContract
     {
-        get => new FunctionDefinition
+        get => new FunctionContract
         {
-            Name = @"UpperCase",
-            Description = "convert input to upper case",
-            Parameters = BinaryData.FromObjectAsJson(new
+            ClassName = @"TypeSafeFunctionCall",
+            Name = @"WeatherReport",
+            Description = @"Get weather report",
+            ReturnType = typeof(Task<string>),
+            Parameters = new global::AutoGen.Core.FunctionParameterContract[]
             {
-                Type = "object",
-                Properties = new
-                {
-                    input = new
+                    new FunctionParameterContract
                     {
-                        Type = @"string",
-                        Description = @"input",
+                        Name = @"city",
+                        Description = @"city",
+                        ParameterType = typeof(string),
+                        IsRequired = true,
                     },
-                },
-                Required = new[]
-                {
-                        "input",
+                    new FunctionParameterContract
+                    {
+                        Name = @"date",
+                        Description = @"date",
+                        ParameterType = typeof(string),
+                        IsRequired = true,
                     },
             },
-            new JsonSerializerOptions
-            {
-                PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
-            })
         };
     }
     #endregion code_snippet_1
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example01_AssistantAgent.cs b/dotnet/sample/AutoGen.BasicSamples/Example01_AssistantAgent.cs
index 3ee363bfc06..40c88102588 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example01_AssistantAgent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example01_AssistantAgent.cs
@@ -4,6 +4,8 @@
 using AutoGen;
 using AutoGen.BasicSample;
 using AutoGen.Core;
+using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 using FluentAssertions;
 
 /// <summary>
@@ -13,18 +15,12 @@ public static class Example01_AssistantAgent
 {
     public static async Task RunAsync()
     {
-        var gpt35 = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var config = new ConversableAgentConfig
-        {
-            Temperature = 0,
-            ConfigList = [gpt35],
-        };
-
-        // create assistant agent
-        var assistantAgent = new AssistantAgent(
+        var gpt4oMini = LLMConfiguration.GetOpenAIGPT4o_mini();
+        var assistantAgent = new OpenAIChatAgent(
+            chatClient: gpt4oMini,
             name: "assistant",
-            systemMessage: "You convert what user said to all uppercase.",
-            llmConfig: config)
+            systemMessage: "You convert what user said to all uppercase.")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         // talk to the assistant agent
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example02_TwoAgent_MathChat.cs b/dotnet/sample/AutoGen.BasicSamples/Example02_TwoAgent_MathChat.cs
index c2957f32da7..b2dd9726b4b 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example02_TwoAgent_MathChat.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example02_TwoAgent_MathChat.cs
@@ -1,30 +1,28 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Example02_TwoAgent_MathChat.cs
 
-using AutoGen;
 using AutoGen.BasicSample;
 using AutoGen.Core;
+using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 using FluentAssertions;
 public static class Example02_TwoAgent_MathChat
 {
     public static async Task RunAsync()
     {
         #region code_snippet_1
-        // get gpt-3.5-turbo config
-        var gpt35 = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
+        var gpt4oMini = LLMConfiguration.GetOpenAIGPT4o_mini();
+
 
         // create teacher agent
         // teacher agent will create math questions
-        var teacher = new AssistantAgent(
+        var teacher = new OpenAIChatAgent(
+            chatClient: gpt4oMini,
             name: "teacher",
             systemMessage: @"You are a teacher that create pre-school math question for student and check answer.
         If the answer is correct, you stop the conversation by saying [COMPLETE].
-        If the answer is wrong, you ask student to fix it.",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = [gpt35],
-            })
+        If the answer is wrong, you ask student to fix it.")
+            .RegisterMessageConnector()
             .RegisterMiddleware(async (msgs, option, agent, _) =>
             {
                 var reply = await agent.GenerateReplyAsync(msgs, option);
@@ -39,14 +37,11 @@ public static async Task RunAsync()
 
         // create student agent
         // student agent will answer the math questions
-        var student = new AssistantAgent(
+        var student = new OpenAIChatAgent(
+            chatClient: gpt4oMini,
             name: "student",
-            systemMessage: "You are a student that answer question from teacher",
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = [gpt35],
-            })
+            systemMessage: "You are a student that answer question from teacher")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         // start the conversation
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example03_Agent_FunctionCall.cs b/dotnet/sample/AutoGen.BasicSamples/Example03_Agent_FunctionCall.cs
index 0ef8eaa48ae..94b67a94b14 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example03_Agent_FunctionCall.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example03_Agent_FunctionCall.cs
@@ -1,9 +1,10 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Example03_Agent_FunctionCall.cs
 
-using AutoGen;
 using AutoGen.BasicSample;
 using AutoGen.Core;
+using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 using FluentAssertions;
 
 /// <summary>
@@ -45,33 +46,30 @@ public async Task<string> CalculateTax(int price, float taxRate)
     public static async Task RunAsync()
     {
         var instance = new Example03_Agent_FunctionCall();
-        var gpt35 = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
         // AutoGen makes use of AutoGen.SourceGenerator to automatically generate FunctionDefinition and FunctionCallWrapper for you.
         // The FunctionDefinition will be created based on function signature and XML documentation.
         // The return type of type-safe function needs to be Task<string>. And to get the best performance, please try only use primitive types and arrays of primitive types as parameters.
-        var config = new ConversableAgentConfig
-        {
-            Temperature = 0,
-            ConfigList = [gpt35],
-            FunctionContracts = new[]
-            {
+        var toolCallMiddleware = new FunctionCallMiddleware(
+            functions: [
                 instance.ConcatStringFunctionContract,
                 instance.UpperCaseFunctionContract,
                 instance.CalculateTaxFunctionContract,
-            },
-        };
-
-        var agent = new AssistantAgent(
-            name: "agent",
-            systemMessage: "You are a helpful AI assistant",
-            llmConfig: config,
+            ],
             functionMap: new Dictionary<string, Func<string, Task<string>>>
             {
-                { nameof(ConcatString), instance.ConcatStringWrapper },
-                { nameof(UpperCase), instance.UpperCaseWrapper },
-                { nameof(CalculateTax), instance.CalculateTaxWrapper },
-            })
+                { nameof(instance.ConcatString), instance.ConcatStringWrapper },
+                { nameof(instance.UpperCase), instance.UpperCaseWrapper },
+                { nameof(instance.CalculateTax), instance.CalculateTaxWrapper },
+            });
+
+        var agent = new OpenAIChatAgent(
+            chatClient: gpt4o,
+            name: "agent",
+            systemMessage: "You are a helpful AI assistant")
+            .RegisterMessageConnector()
+            .RegisterStreamingMiddleware(toolCallMiddleware)
             .RegisterPrintMessage();
 
         // talk to the assistant agent
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs b/dotnet/sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs
index 21605992840..f90816d890e 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs
@@ -1,11 +1,12 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Example04_Dynamic_GroupChat_Coding_Task.cs
 
-using AutoGen;
 using AutoGen.BasicSample;
 using AutoGen.Core;
 using AutoGen.DotnetInteractive;
+using AutoGen.DotnetInteractive.Extension;
 using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 using FluentAssertions;
 
 public partial class Example04_Dynamic_GroupChat_Coding_Task
@@ -14,50 +15,32 @@ public static async Task RunAsync()
     {
         var instance = new Example04_Dynamic_GroupChat_Coding_Task();
 
-        // setup dotnet interactive
-        var workDir = Path.Combine(Path.GetTempPath(), "InteractiveService");
-        if (!Directory.Exists(workDir))
-        {
-            Directory.CreateDirectory(workDir);
-        }
-
-        using var service = new InteractiveService(workDir);
-        var dotnetInteractiveFunctions = new DotnetInteractiveFunction(service);
-
-        var result = Path.Combine(workDir, "result.txt");
-        if (File.Exists(result))
-        {
-            File.Delete(result);
-        }
-
-        await service.StartAsync(workDir, default);
+        var kernel = DotnetInteractiveKernelBuilder
+            .CreateDefaultInProcessKernelBuilder()
+            .AddPythonKernel("python3")
+            .Build();
 
-        var gptConfig = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
-        var helperAgent = new GPTAgent(
-            name: "helper",
-            systemMessage: "You are a helpful AI assistant",
-            temperature: 0f,
-            config: gptConfig);
-
-        var groupAdmin = new GPTAgent(
+        var groupAdmin = new OpenAIChatAgent(
+            chatClient: gpt4o,
             name: "groupAdmin",
-            systemMessage: "You are the admin of the group chat",
-            temperature: 0f,
-            config: gptConfig)
+            systemMessage: "You are the admin of the group chat")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
-        var userProxy = new UserProxyAgent(name: "user", defaultReply: GroupChatExtension.TERMINATE, humanInputMode: HumanInputMode.NEVER)
+        var userProxy = new DefaultReplyAgent(name: "user", defaultReply: GroupChatExtension.TERMINATE)
             .RegisterPrintMessage();
 
         // Create admin agent
-        var admin = new AssistantAgent(
+        var admin = new OpenAIChatAgent(
+            chatClient: gpt4o,
             name: "admin",
             systemMessage: """
             You are a manager who takes coding problem from user and resolve problem by splitting them into small tasks and assign each task to the most appropriate agent.
             Here's available agents who you can assign task to:
-            - coder: write dotnet code to resolve task
-            - runner: run dotnet code from coder
+            - coder: write python code to resolve task
+            - runner: run python code from coder
 
             The workflow is as follows:
             - You take the coding problem from user
@@ -83,24 +66,12 @@ You are a manager who takes coding problem from user and resolve problem by spli
 
             Once the coding problem is resolved, summarize each steps and results and send the summary to the user using the following format:
             ```summary
-            {
-                "problem": "{coding problem}",
-                "steps": [
-                    {
-                        "step": "{step}",
-                        "result": "{result}"
-                    }
-                ]
-            }
+            @user, <summary of the task>
             ```
 
             Your reply must contain one of [task|ask|summary] to indicate the type of your message.
-            """,
-            llmConfig: new ConversableAgentConfig
-            {
-                Temperature = 0,
-                ConfigList = [gptConfig],
-            })
+            """)
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         // create coder agent
@@ -108,30 +79,27 @@ Your reply must contain one of [task|ask|summary] to indicate the type of your m
         // The dotnet coder write dotnet code to resolve the task.
         // The code reviewer review the code block from coder's reply.
         // The nuget agent install nuget packages if there's any.
-        var coderAgent = new GPTAgent(
+        var coderAgent = new OpenAIChatAgent(
             name: "coder",
-            systemMessage: @"You act as dotnet coder, you write dotnet code to resolve task. Once you finish writing code, ask runner to run the code for you.
+            chatClient: gpt4o,
+            systemMessage: @"You act as python coder, you write python code to resolve task. Once you finish writing code, ask runner to run the code for you.
 
 Here're some rules to follow on writing dotnet code:
-- put code between ```csharp and ```
-- When creating http client, use `var httpClient = new HttpClient()`. Don't use `using var httpClient = new HttpClient()` because it will cause error when running the code.
-- Try to use `var` instead of explicit type.
-- Try avoid using external library, use .NET Core library instead.
-- Use top level statement to write code.
+- put code between ```python and ```
+- Try avoid using external library
 - Always print out the result to console. Don't write code that doesn't print out anything.
 
-If you need to install nuget packages, put nuget packages in the following format:
-```nuget
-nuget_package_name
+Use the following format to install pip package:
+```python
+%pip install <package_name>
 ```
 
 If your code is incorrect, Fix the error and send the code again.
 
 Here's some externel information
 - The link to mlnet repo is: https://github.com/dotnet/machinelearning. you don't need a token to use github pr api. Make sure to include a User-Agent header, otherwise github will reject it.
-",
-            config: gptConfig,
-            temperature: 0.4f)
+")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         // code reviewer agent will review if code block from coder's reply satisfy the following conditions:
@@ -139,14 +107,13 @@ Your reply must contain one of [task|ask|summary] to indicate the type of your m
         // - The code block is csharp code block
         // - The code block is top level statement
         // - The code block is not using declaration
-        var codeReviewAgent = new GPTAgent(
+        var codeReviewAgent = new OpenAIChatAgent(
+            chatClient: gpt4o,
             name: "reviewer",
             systemMessage: """
             You are a code reviewer who reviews code from coder. You need to check if the code satisfy the following conditions:
-            - The reply from coder contains at least one code block, e.g ```csharp and ```
-            - There's only one code block and it's csharp code block
-            - The code block is not inside a main function. a.k.a top level statement
-            - The code block is not using declaration when creating http client
+            - The reply from coder contains at least one code block, e.g ```python and ```
+            - There's only one code block and it's python code block
 
             You don't check the code style, only check if the code satisfy the above conditions.
 
@@ -164,23 +131,40 @@ Your reply must contain one of [task|ask|summary] to indicate the type of your m
             result: REJECTED
             ```
 
-            """,
-            config: gptConfig,
-            temperature: 0f)
+            """)
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         // create runner agent
         // The runner agent will run the code block from coder's reply.
         // It runs dotnet code using dotnet interactive service hook.
         // It also truncate the output if the output is too long.
-        var runner = new AssistantAgent(
+        var runner = new DefaultReplyAgent(
             name: "runner",
             defaultReply: "No code available, coder, write code please")
-            .RegisterDotnetCodeBlockExectionHook(interactiveService: service)
             .RegisterMiddleware(async (msgs, option, agent, ct) =>
             {
                 var mostRecentCoderMessage = msgs.LastOrDefault(x => x.From == "coder") ?? throw new Exception("No coder message found");
-                return await agent.GenerateReplyAsync(new[] { mostRecentCoderMessage }, option, ct);
+
+                if (mostRecentCoderMessage.ExtractCodeBlock("```python", "```") is string code)
+                {
+                    var result = await kernel.RunSubmitCodeCommandAsync(code, "python");
+                    // only keep the first 500 characters
+                    if (result.Length > 500)
+                    {
+                        result = result.Substring(0, 500);
+                    }
+                    result = $"""
+                    # [CODE_BLOCK_EXECUTION_RESULT]
+                    {result}
+                    """;
+
+                    return new TextMessage(Role.Assistant, result, from: agent.Name);
+                }
+                else
+                {
+                    return await agent.GenerateReplyAsync(msgs, option, ct);
+                }
             })
             .RegisterPrintMessage();
 
@@ -251,18 +235,27 @@ Your reply must contain one of [task|ask|summary] to indicate the type of your m
             workflow: workflow);
 
         // task 1: retrieve the most recent pr from mlnet and save it in result.txt
-        var groupChatManager = new GroupChatManager(groupChat);
-        await userProxy.SendAsync(groupChatManager, "Retrieve the most recent pr from mlnet and save it in result.txt", maxRound: 30);
-        File.Exists(result).Should().BeTrue();
-
-        // task 2: calculate the 39th fibonacci number
-        var answer = 63245986;
-        // clear the result file
-        File.Delete(result);
+        var task = """
+            retrieve the most recent pr from mlnet and save it in result.txt
+            """;
+        var chatHistory = new List<IMessage>
+        {
+            new TextMessage(Role.Assistant, task)
+            {
+                From = userProxy.Name
+            }
+        };
+        await foreach (var message in groupChat.SendAsync(chatHistory, maxRound: 10))
+        {
+            if (message.From == admin.Name && message.GetContent().Contains("```summary"))
+            {
+                // Task complete!
+                break;
+            }
+        }
 
-        var conversationHistory = await userProxy.InitiateChatAsync(groupChatManager, "What's the 39th of fibonacci number? Save the result in result.txt", maxRound: 10);
+        // check if the result file is created
+        var result = "result.txt";
         File.Exists(result).Should().BeTrue();
-        var resultContent = File.ReadAllText(result);
-        resultContent.Should().Contain(answer.ToString());
     }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs b/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs
index ba7b5d4bde4..e8dd86474e7 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs
@@ -4,9 +4,9 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 using FluentAssertions;
-using autogen = AutoGen.LLMConfigAPI;
+using OpenAI;
+using OpenAI.Images;
 
 public partial class Example05_Dalle_And_GPT4V
 {
@@ -30,16 +30,12 @@ public async Task<string> GenerateImage(string prompt)
         // and return url.
         var option = new ImageGenerationOptions
         {
-            Size = ImageSize.Size1024x1024,
-            Style = ImageGenerationStyle.Vivid,
-            ImageCount = 1,
-            Prompt = prompt,
-            Quality = ImageGenerationQuality.Standard,
-            DeploymentName = "dall-e-3",
+            Size = GeneratedImageSize.W1024xH1024,
+            Style = GeneratedImageStyle.Vivid,
         };
 
-        var imageResponse = await openAIClient.GetImageGenerationsAsync(option);
-        var imageUrl = imageResponse.Value.Data.First().Url.OriginalString;
+        var imageResponse = await openAIClient.GetImageClient("dall-e-3").GenerateImageAsync(prompt, option);
+        var imageUrl = imageResponse.Value.ImageUri.OriginalString;
 
         return $@"// ignore this line [IMAGE_GENERATION]
 The image is generated from prompt {prompt}
@@ -57,8 +53,6 @@ public static async Task RunAsync()
 
         // get OpenAI Key and create config
         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var gpt35Config = autogen.GetOpenAIConfigList(openAIKey, new[] { "gpt-3.5-turbo" });
-        var gpt4vConfig = autogen.GetOpenAIConfigList(openAIKey, new[] { "gpt-4-vision-preview" });
         var openAIClient = new OpenAIClient(openAIKey);
         var instance = new Example05_Dalle_And_GPT4V(openAIClient);
         var imagePath = Path.Combine("resource", "images", "background.png");
@@ -74,8 +68,7 @@ public static async Task RunAsync()
                 { nameof(GenerateImage), instance.GenerateImageWrapper },
             });
         var dalleAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
-            modelName: "gpt-3.5-turbo",
+            chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
             name: "dalle",
             systemMessage: "You are a DALL-E agent that generate image from prompt, when conversation is terminated, return the most recent image url")
             .RegisterMessageConnector()
@@ -110,9 +103,8 @@ public static async Task RunAsync()
             .RegisterPrintMessage();
 
         var gpt4VAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
-            name: "gpt4v",
-            modelName: "gpt-4-vision-preview",
+            chatClient: openAIClient.GetChatClient("gpt-4o-mini"),
+            name: "gpt-4o-mini",
             systemMessage: @"You are a critism that provide feedback to DALL-E agent.
 Carefully check the image generated by DALL-E agent and provide feedback.
 If the image satisfies the condition, then say [APPROVE].
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example06_UserProxyAgent.cs b/dotnet/sample/AutoGen.BasicSamples/Example06_UserProxyAgent.cs
index dd3b5a67192..e1349cb32a9 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example06_UserProxyAgent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example06_UserProxyAgent.cs
@@ -2,6 +2,7 @@
 // Example06_UserProxyAgent.cs
 using AutoGen.Core;
 using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 
 namespace AutoGen.BasicSample;
 
@@ -9,12 +10,13 @@ public static class Example06_UserProxyAgent
 {
     public static async Task RunAsync()
     {
-        var gpt35 = LLMConfiguration.GetOpenAIGPT3_5_Turbo();
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
-        var assistantAgent = new GPTAgent(
+        var assistantAgent = new OpenAIChatAgent(
+            chatClient: gpt4o,
             name: "assistant",
-            systemMessage: "You are an assistant that help user to do some tasks.",
-            config: gpt35)
+            systemMessage: "You are an assistant that help user to do some tasks.")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         // set human input mode to ALWAYS so that user always provide input
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example07_Dynamic_GroupChat_Calculate_Fibonacci.cs b/dotnet/sample/AutoGen.BasicSamples/Example07_Dynamic_GroupChat_Calculate_Fibonacci.cs
index dd4fcada967..1f1315586a2 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example07_Dynamic_GroupChat_Calculate_Fibonacci.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example07_Dynamic_GroupChat_Calculate_Fibonacci.cs
@@ -6,10 +6,11 @@
 using AutoGen.BasicSample;
 using AutoGen.Core;
 using AutoGen.DotnetInteractive;
+using AutoGen.DotnetInteractive.Extension;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
-using FluentAssertions;
+using Microsoft.DotNet.Interactive;
+using OpenAI.Chat;
 
 public partial class Example07_Dynamic_GroupChat_Calculate_Fibonacci
 {
@@ -49,11 +50,10 @@ public async Task<string> ReviewCodeBlock(
     #endregion reviewer_function
 
     #region create_coder
-    public static async Task<IAgent> CreateCoderAgentAsync(OpenAIClient client, string deployModel)
+    public static async Task<IAgent> CreateCoderAgentAsync(ChatClient client)
     {
         var coder = new OpenAIChatAgent(
-            openAIClient: client,
-            modelName: deployModel,
+            chatClient: client,
             name: "coder",
             systemMessage: @"You act as dotnet coder, you write dotnet code to resolve task. Once you finish writing code, ask runner to run the code for you.
 
@@ -80,12 +80,11 @@ public static async Task<IAgent> CreateCoderAgentAsync(OpenAIClient client, stri
     #endregion create_coder
 
     #region create_runner
-    public static async Task<IAgent> CreateRunnerAgentAsync(InteractiveService service)
+    public static async Task<IAgent> CreateRunnerAgentAsync(Kernel kernel)
     {
         var runner = new DefaultReplyAgent(
             name: "runner",
             defaultReply: "No code available.")
-            .RegisterDotnetCodeBlockExectionHook(interactiveService: service)
             .RegisterMiddleware(async (msgs, option, agent, _) =>
             {
                 if (msgs.Count() == 0 || msgs.All(msg => msg.From != "coder"))
@@ -95,7 +94,24 @@ public static async Task<IAgent> CreateRunnerAgentAsync(InteractiveService servi
                 else
                 {
                     var coderMsg = msgs.Last(msg => msg.From == "coder");
-                    return await agent.GenerateReplyAsync([coderMsg], option);
+                    if (coderMsg.ExtractCodeBlock("```csharp", "```") is string code)
+                    {
+                        var codeResult = await kernel.RunSubmitCodeCommandAsync(code, "csharp");
+
+                        codeResult = $"""
+                        [RUNNER_RESULT]
+                        {codeResult}
+                        """;
+
+                        return new TextMessage(Role.Assistant, codeResult)
+                        {
+                            From = "runner",
+                        };
+                    }
+                    else
+                    {
+                        return new TextMessage(Role.Assistant, "No code available. Coder please write code");
+                    }
                 }
             })
             .RegisterPrintMessage();
@@ -105,11 +121,10 @@ public static async Task<IAgent> CreateRunnerAgentAsync(InteractiveService servi
     #endregion create_runner
 
     #region create_admin
-    public static async Task<IAgent> CreateAdminAsync(OpenAIClient client, string deployModel)
+    public static async Task<IAgent> CreateAdminAsync(ChatClient client)
     {
         var admin = new OpenAIChatAgent(
-            openAIClient: client,
-            modelName: deployModel,
+            chatClient: client,
             name: "admin",
             temperature: 0)
             .RegisterMessageConnector()
@@ -120,9 +135,8 @@ public static async Task<IAgent> CreateAdminAsync(OpenAIClient client, string de
     #endregion create_admin
 
     #region create_reviewer
-    public static async Task<IAgent> CreateReviewerAgentAsync(OpenAIClient openAIClient, string deployModel)
+    public static async Task<IAgent> CreateReviewerAgentAsync(ChatClient chatClient)
     {
-        var gpt3Config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
         var functions = new Example07_Dynamic_GroupChat_Calculate_Fibonacci();
         var functionCallMiddleware = new FunctionCallMiddleware(
             functions: [functions.ReviewCodeBlockFunctionContract],
@@ -131,10 +145,9 @@ public static async Task<IAgent> CreateReviewerAgentAsync(OpenAIClient openAICli
                 { nameof(functions.ReviewCodeBlock), functions.ReviewCodeBlockWrapper },
             });
         var reviewer = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: chatClient,
             name: "code_reviewer",
-            systemMessage: @"You review code block from coder",
-            modelName: deployModel)
+            systemMessage: @"You review code block from coder")
             .RegisterMessageConnector()
             .RegisterStreamingMiddleware(functionCallMiddleware)
             .RegisterMiddleware(async (msgs, option, innerAgent, ct) =>
@@ -216,25 +229,17 @@ public static async Task<IAgent> CreateReviewerAgentAsync(OpenAIClient openAICli
     public static async Task RunWorkflowAsync()
     {
         long the39thFibonacciNumber = 63245986;
-        var workDir = Path.Combine(Path.GetTempPath(), "InteractiveService");
-        if (!Directory.Exists(workDir))
-        {
-            Directory.CreateDirectory(workDir);
-        }
-
-        var config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var openaiClient = new OpenAIClient(new Uri(config.Endpoint), new Azure.AzureKeyCredential(config.ApiKey));
-
-        using var service = new InteractiveService(workDir);
-        var dotnetInteractiveFunctions = new DotnetInteractiveFunction(service);
+        var kernel = DotnetInteractiveKernelBuilder
+            .CreateDefaultInProcessKernelBuilder()
+            .Build();
 
-        await service.StartAsync(workDir, default);
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
         #region create_workflow
-        var reviewer = await CreateReviewerAgentAsync(openaiClient, config.DeploymentName);
-        var coder = await CreateCoderAgentAsync(openaiClient, config.DeploymentName);
-        var runner = await CreateRunnerAgentAsync(service);
-        var admin = await CreateAdminAsync(openaiClient, config.DeploymentName);
+        var reviewer = await CreateReviewerAgentAsync(gpt4o);
+        var coder = await CreateCoderAgentAsync(gpt4o);
+        var runner = await CreateRunnerAgentAsync(kernel);
+        var admin = await CreateAdminAsync(gpt4o);
 
         var admin2CoderTransition = Transition.Create(admin, coder);
         var coder2ReviewerTransition = Transition.Create(coder, reviewer);
@@ -305,21 +310,23 @@ public static async Task RunWorkflowAsync()
                 runner,
                 reviewer,
             ]);
-
+        #endregion create_group_chat_with_workflow
         admin.SendIntroduction("Welcome to my group, work together to resolve my task", groupChat);
         coder.SendIntroduction("I will write dotnet code to resolve task", groupChat);
         reviewer.SendIntroduction("I will review dotnet code", groupChat);
         runner.SendIntroduction("I will run dotnet code once the review is done", groupChat);
+        var task = "What's the 39th of fibonacci number?";
 
-        var groupChatManager = new GroupChatManager(groupChat);
-        var conversationHistory = await admin.InitiateChatAsync(groupChatManager, "What's the 39th of fibonacci number?", maxRound: 10);
-        #endregion create_group_chat_with_workflow
-        // the last message is from admin, which is the termination message
-        var lastMessage = conversationHistory.Last();
-        lastMessage.From.Should().Be("admin");
-        lastMessage.IsGroupChatTerminateMessage().Should().BeTrue();
-        lastMessage.Should().BeOfType<TextMessage>();
-        lastMessage.GetContent().Should().Contain(the39thFibonacciNumber.ToString());
+        var taskMessage = new TextMessage(Role.User, task, from: admin.Name);
+        await foreach (var message in groupChat.SendAsync([taskMessage], maxRound: 10))
+        {
+            // teminate chat if message is from runner and run successfully
+            if (message.From == "runner" && message.GetContent().Contains(the39thFibonacciNumber.ToString()))
+            {
+                Console.WriteLine($"The 39th of fibonacci number is {the39thFibonacciNumber}");
+                break;
+            }
+        }
     }
 
     public static async Task RunAsync()
@@ -331,18 +338,16 @@ public static async Task RunAsync()
             Directory.CreateDirectory(workDir);
         }
 
-        var config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var openaiClient = new OpenAIClient(new Uri(config.Endpoint), new Azure.AzureKeyCredential(config.ApiKey));
-
-        using var service = new InteractiveService(workDir);
-        var dotnetInteractiveFunctions = new DotnetInteractiveFunction(service);
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
-        await service.StartAsync(workDir, default);
+        var kernel = DotnetInteractiveKernelBuilder
+            .CreateDefaultInProcessKernelBuilder()
+            .Build();
         #region create_group_chat
-        var reviewer = await CreateReviewerAgentAsync(openaiClient, config.DeploymentName);
-        var coder = await CreateCoderAgentAsync(openaiClient, config.DeploymentName);
-        var runner = await CreateRunnerAgentAsync(service);
-        var admin = await CreateAdminAsync(openaiClient, config.DeploymentName);
+        var reviewer = await CreateReviewerAgentAsync(gpt4o);
+        var coder = await CreateCoderAgentAsync(gpt4o);
+        var runner = await CreateRunnerAgentAsync(kernel);
+        var admin = await CreateAdminAsync(gpt4o);
         var groupChat = new GroupChat(
             admin: admin,
             members:
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example08_LMStudio.cs b/dotnet/sample/AutoGen.BasicSamples/Example08_LMStudio.cs
index cce33011762..e58454fdb5f 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example08_LMStudio.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example08_LMStudio.cs
@@ -3,7 +3,9 @@
 
 #region lmstudio_using_statements
 using AutoGen.Core;
-using AutoGen.LMStudio;
+using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
+using OpenAI;
 #endregion lmstudio_using_statements
 
 namespace AutoGen.BasicSample;
@@ -13,8 +15,16 @@ public class Example08_LMStudio
     public static async Task RunAsync()
     {
         #region lmstudio_example_1
-        var config = new LMStudioConfig("localhost", 1234);
-        var lmAgent = new LMStudioAgent("asssistant", config: config)
+        var endpoint = "http://localhost:1234";
+        var openaiClient = new OpenAIClient("api-key", new OpenAIClientOptions
+        {
+            Endpoint = new Uri(endpoint),
+        });
+
+        var lmAgent = new OpenAIChatAgent(
+            chatClient: openaiClient.GetChatClient("<does-not-matter>"),
+            name: "assistant")
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         await lmAgent.SendAsync("Can you write a piece of C# code to calculate 100th of fibonacci?");
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example09_LMStudio_FunctionCall.cs b/dotnet/sample/AutoGen.BasicSamples/Example09_LMStudio_FunctionCall.cs
deleted file mode 100644
index c9dda27d2e2..00000000000
--- a/dotnet/sample/AutoGen.BasicSamples/Example09_LMStudio_FunctionCall.cs
+++ /dev/null
@@ -1,136 +0,0 @@
-﻿// Copyright (c) Microsoft Corporation. All rights reserved.
-// Example09_LMStudio_FunctionCall.cs
-
-using System.Text.Json;
-using System.Text.Json.Serialization;
-using AutoGen.Core;
-using AutoGen.LMStudio;
-using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
-
-namespace AutoGen.BasicSample;
-
-public class LLaMAFunctionCall
-{
-    [JsonPropertyName("name")]
-    public string Name { get; set; }
-
-    [JsonPropertyName("arguments")]
-    public JsonElement Arguments { get; set; }
-}
-
-public partial class Example09_LMStudio_FunctionCall
-{
-    /// <summary>
-    /// Get weather from location.
-    /// </summary>
-    /// <param name="location">location</param>
-    /// <param name="date">date. type is string</param>
-    [Function]
-    public async Task<string> GetWeather(string location, string date)
-    {
-        return $"[Function] The weather on {date} in {location} is sunny.";
-    }
-
-
-    /// <summary>
-    /// Search query on Google and return the results.
-    /// </summary>
-    /// <param name="query">search query</param>
-    [Function]
-    public async Task<string> GoogleSearch(string query)
-    {
-        return $"[Function] Here are the search results for {query}.";
-    }
-
-    private static object SerializeFunctionDefinition(FunctionDefinition functionDefinition)
-    {
-        return new
-        {
-            type = "function",
-            function = new
-            {
-                name = functionDefinition.Name,
-                description = functionDefinition.Description,
-                parameters = functionDefinition.Parameters.ToObjectFromJson<object>(),
-            }
-        };
-    }
-
-    public static async Task RunAsync()
-    {
-        #region lmstudio_function_call_example
-        // This example has been verified to work with Trelis-Llama-2-7b-chat-hf-function-calling-v3
-        var instance = new Example09_LMStudio_FunctionCall();
-        var config = new LMStudioConfig("localhost", 1234);
-        var systemMessage = @$"You are a helpful AI assistant.";
-
-        // Because the LM studio server doesn't support openai function call yet
-        // To simulate the function call, we can put the function call details in the system message
-        // And ask agent to response in function call object format using few-shot example
-        object[] functionList =
-            [
-                SerializeFunctionDefinition(instance.GetWeatherFunctionContract.ToOpenAIFunctionDefinition()),
-                SerializeFunctionDefinition(instance.GetWeatherFunctionContract.ToOpenAIFunctionDefinition())
-            ];
-        var functionListString = JsonSerializer.Serialize(functionList, new JsonSerializerOptions { WriteIndented = true });
-        var lmAgent = new LMStudioAgent(
-            name: "assistant",
-            systemMessage: @$"
-You are a helpful AI assistant
-You have access to the following functions. Use them if required:
-
-{functionListString}",
-            config: config)
-            .RegisterMiddleware(async (msgs, option, innerAgent, ct) =>
-            {
-                // inject few-shot example to the message
-                var exampleGetWeather = new TextMessage(Role.User, "Get weather in London");
-                var exampleAnswer = new TextMessage(Role.Assistant, "{\n    \"name\": \"GetWeather\",\n    \"arguments\": {\n        \"city\": \"London\"\n    }\n}", from: innerAgent.Name);
-
-                msgs = new[] { exampleGetWeather, exampleAnswer }.Concat(msgs).ToArray();
-                var reply = await innerAgent.GenerateReplyAsync(msgs, option, ct);
-
-                // if reply is a function call, invoke function
-                var content = reply.GetContent();
-                try
-                {
-                    if (JsonSerializer.Deserialize<LLaMAFunctionCall>(content) is { } functionCall)
-                    {
-                        var arguments = JsonSerializer.Serialize(functionCall.Arguments);
-                        // invoke function wrapper
-                        if (functionCall.Name == instance.GetWeatherFunctionContract.Name)
-                        {
-                            var result = await instance.GetWeatherWrapper(arguments);
-                            return new TextMessage(Role.Assistant, result);
-                        }
-                        else if (functionCall.Name == instance.GetWeatherFunctionContract.Name)
-                        {
-                            var result = await instance.GoogleSearchWrapper(arguments);
-                            return new TextMessage(Role.Assistant, result);
-                        }
-                        else
-                        {
-                            throw new Exception($"Unknown function call: {functionCall.Name}");
-                        }
-                    }
-                }
-                catch (JsonException)
-                {
-                    // ignore
-                }
-
-                return reply;
-            })
-            .RegisterPrintMessage();
-
-        var userProxyAgent = new UserProxyAgent(
-            name: "user",
-            humanInputMode: HumanInputMode.ALWAYS);
-
-        await userProxyAgent.SendAsync(
-            receiver: lmAgent,
-            "Search the names of the five largest stocks in the US by market cap ");
-        #endregion lmstudio_function_call_example
-    }
-}
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example10_SemanticKernel.cs b/dotnet/sample/AutoGen.BasicSamples/Example10_SemanticKernel.cs
index 61c341204ec..da7e54852f3 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example10_SemanticKernel.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example10_SemanticKernel.cs
@@ -39,7 +39,7 @@ public class Example10_SemanticKernel
     public static async Task RunAsync()
     {
         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var modelId = "gpt-3.5-turbo";
+        var modelId = "gpt-4o-mini";
         var builder = Kernel.CreateBuilder()
             .AddOpenAIChatCompletion(modelId: modelId, apiKey: openAIKey);
         var kernel = builder.Build();
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example11_Sequential_GroupChat_Example.cs b/dotnet/sample/AutoGen.BasicSamples/Example11_Sequential_GroupChat_Example.cs
index 00ff321082a..32aaa8c187b 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example11_Sequential_GroupChat_Example.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example11_Sequential_GroupChat_Example.cs
@@ -7,7 +7,6 @@
 using AutoGen.OpenAI.Extension;
 using AutoGen.SemanticKernel;
 using AutoGen.SemanticKernel.Extension;
-using Azure.AI.OpenAI;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.Plugins.Web;
 using Microsoft.SemanticKernel.Plugins.Web.Bing;
@@ -52,15 +51,10 @@ You put the original search result between ```bing and ```
     public static async Task<IAgent> CreateSummarizerAgentAsync()
     {
         #region CreateSummarizerAgent
-        var config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var apiKey = config.ApiKey;
-        var endPoint = new Uri(config.Endpoint);
-
-        var openAIClient = new OpenAIClient(endPoint, new Azure.AzureKeyCredential(apiKey));
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var openAIClientAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: gpt4o,
             name: "summarizer",
-            modelName: config.DeploymentName,
             systemMessage: "You summarize search result from bing in a short and concise manner");
 
         return openAIClientAgent
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example12_TwoAgent_Fill_Application.cs b/dotnet/sample/AutoGen.BasicSamples/Example12_TwoAgent_Fill_Application.cs
index b622a3e641e..69c2121cd80 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example12_TwoAgent_Fill_Application.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example12_TwoAgent_Fill_Application.cs
@@ -5,7 +5,6 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 
 namespace AutoGen.BasicSample;
 
@@ -69,11 +68,7 @@ public async Task<string> SaveProgress(
 
     public static async Task<IAgent> CreateSaveProgressAgent()
     {
-        var gpt3Config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var endPoint = gpt3Config.Endpoint ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
-        var apiKey = gpt3Config.ApiKey ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
-        var openaiClient = new OpenAIClient(new Uri(endPoint), new Azure.AzureKeyCredential(apiKey));
-
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var instance = new TwoAgent_Fill_Application();
         var functionCallConnector = new FunctionCallMiddleware(
             functions: [instance.SaveProgressFunctionContract],
@@ -83,9 +78,8 @@ public static async Task<IAgent> CreateSaveProgressAgent()
             });
 
         var chatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "application",
-            modelName: gpt3Config.DeploymentName,
             systemMessage: """You are a helpful application form assistant who saves progress while user fills application.""")
             .RegisterMessageConnector()
             .RegisterMiddleware(functionCallConnector)
@@ -109,48 +103,23 @@ Save progress according to the most recent information provided by user.
 
     public static async Task<IAgent> CreateAssistantAgent()
     {
-        var gpt3Config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var endPoint = gpt3Config.Endpoint ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
-        var apiKey = gpt3Config.ApiKey ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
-        var openaiClient = new OpenAIClient(new Uri(endPoint), new Azure.AzureKeyCredential(apiKey));
-
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var chatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "assistant",
-            modelName: gpt3Config.DeploymentName,
             systemMessage: """You create polite prompt to ask user provide missing information""")
             .RegisterMessageConnector()
-            .RegisterPrintMessage()
-            .RegisterMiddleware(async (msgs, option, agent, ct) =>
-            {
-                var lastReply = msgs.Last() ?? throw new Exception("No reply found.");
-                var reply = await agent.GenerateReplyAsync(msgs, option, ct);
-
-                // if application is complete, exit conversation by sending termination message
-                if (lastReply.GetContent().Contains("Application information is saved to database."))
-                {
-                    return new TextMessage(Role.Assistant, GroupChatExtension.TERMINATE, from: agent.Name);
-                }
-                else
-                {
-                    return reply;
-                }
-            });
+            .RegisterPrintMessage();
 
         return chatAgent;
     }
 
     public static async Task<IAgent> CreateUserAgent()
     {
-        var gpt3Config = LLMConfiguration.GetAzureOpenAIGPT3_5_Turbo();
-        var endPoint = gpt3Config.Endpoint ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
-        var apiKey = gpt3Config.ApiKey ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
-        var openaiClient = new OpenAIClient(new Uri(endPoint), new Azure.AzureKeyCredential(apiKey));
-
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var chatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "user",
-            modelName: gpt3Config.DeploymentName,
             systemMessage: """
             You are a user who is filling an application form. Simply provide the information as requested and answer the questions, don't do anything else.
             
@@ -191,9 +160,13 @@ public static async Task RunAsync()
         var groupChatManager = new GroupChatManager(groupChat);
         var initialMessage = await assistantAgent.SendAsync("Generate a greeting meesage for user and start the conversation by asking what's their name.");
 
-        var chatHistory = await userAgent.SendAsync(groupChatManager, [initialMessage], maxRound: 30);
-
-        var lastMessage = chatHistory.Last();
-        Console.WriteLine(lastMessage.GetContent());
+        var chatHistory = new List<IMessage> { initialMessage };
+        await foreach (var msg in userAgent.SendAsync(groupChatManager, chatHistory, maxRound: 30))
+        {
+            if (msg.GetContent().ToLower().Contains("application information is saved to database.") is true)
+            {
+                break;
+            }
+        }
     }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs b/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs
index dee9915511d..4a4b10ae3d7 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs
@@ -3,6 +3,7 @@
 
 using AutoGen.Core;
 using AutoGen.OpenAI;
+using AutoGen.OpenAI.Extension;
 
 namespace AutoGen.BasicSample;
 
@@ -27,14 +28,14 @@ public static class Example15_GPT4V_BinaryDataImageMessage
 
     public static async Task RunAsync()
     {
-        var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var openAiConfig = new OpenAIConfig(openAIKey, "gpt-4o");
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
 
-        var visionAgent = new GPTAgent(
+        var visionAgent = new OpenAIChatAgent(
+            chatClient: gpt4o,
             name: "gpt",
             systemMessage: "You are a helpful AI assistant",
-            config: openAiConfig,
             temperature: 0)
+            .RegisterMessageConnector()
             .RegisterPrintMessage();
 
         List<IMessage> messages =
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example17_ReActAgent.cs b/dotnet/sample/AutoGen.BasicSamples/Example17_ReActAgent.cs
index f598ebbf7c4..170736bf22e 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Example17_ReActAgent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Example17_ReActAgent.cs
@@ -4,14 +4,14 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
+using OpenAI;
+using OpenAI.Chat;
 
 namespace AutoGen.BasicSample;
 
 public class OpenAIReActAgent : IAgent
 {
-    private readonly OpenAIClient _client;
-    private readonly string modelName = "gpt-3.5-turbo";
+    private readonly ChatClient _client;
     private readonly FunctionContract[] tools;
     private readonly Dictionary<string, Func<string, Task<string>>> toolExecutors = new();
     private readonly IAgent reasoner;
@@ -39,16 +39,15 @@ public class OpenAIReActAgent : IAgent
 Begin!
 Question: {input}";
 
-    public OpenAIReActAgent(OpenAIClient client, string modelName, string name, FunctionContract[] tools, Dictionary<string, Func<string, Task<string>>> toolExecutors)
+    public OpenAIReActAgent(ChatClient client, string name, FunctionContract[] tools, Dictionary<string, Func<string, Task<string>>> toolExecutors)
     {
         _client = client;
         this.Name = name;
-        this.modelName = modelName;
         this.tools = tools;
         this.toolExecutors = toolExecutors;
         this.reasoner = CreateReasoner();
         this.actor = CreateActor();
-        this.helper = new OpenAIChatAgent(client, "helper", modelName)
+        this.helper = new OpenAIChatAgent(client, "helper")
             .RegisterMessageConnector();
     }
 
@@ -106,8 +105,7 @@ private string CreateReActPrompt(string input)
     private IAgent CreateReasoner()
     {
         return new OpenAIChatAgent(
-            openAIClient: _client,
-            modelName: modelName,
+            chatClient: _client,
             name: "reasoner")
             .RegisterMessageConnector()
             .RegisterPrintMessage();
@@ -117,8 +115,7 @@ private IAgent CreateActor()
     {
         var functionCallMiddleware = new FunctionCallMiddleware(tools, toolExecutors);
         return new OpenAIChatAgent(
-            openAIClient: _client,
-            modelName: modelName,
+            chatClient: _client,
             name: "actor")
             .RegisterMessageConnector()
             .RegisterMiddleware(functionCallMiddleware)
@@ -166,9 +163,9 @@ public static async Task RunAsync()
         var modelName = "gpt-4-turbo";
         var tools = new Tools();
         var openAIClient = new OpenAIClient(openAIKey);
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var reactAgent = new OpenAIReActAgent(
-            client: openAIClient,
-            modelName: modelName,
+            client: openAIClient.GetChatClient(modelName),
             name: "react-agent",
             tools: [tools.GetLocalizationFunctionContract, tools.GetDateTodayFunctionContract, tools.WeatherReportFunctionContract],
             toolExecutors: new Dictionary<string, Func<string, Task<string>>>
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Agent_Middleware.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Agent_Middleware.cs
index 57f8ab4075c..cf97af13467 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Agent_Middleware.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Agent_Middleware.cs
@@ -5,9 +5,9 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 #endregion Using
 using FluentAssertions;
+using OpenAI.Chat;
 
 namespace AutoGen.BasicSample;
 
@@ -16,20 +16,17 @@ public class Agent_Middleware
     public static async Task RunTokenCountAsync()
     {
         #region Create_Agent
-        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new InvalidOperationException("Please set the environment variable OPENAI_API_KEY");
-        var model = "gpt-3.5-turbo";
-        var openaiClient = new OpenAIClient(apiKey);
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var openaiMessageConnector = new OpenAIChatRequestMessageConnector();
         var totalTokenCount = 0;
         var agent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "agent",
-            modelName: model,
             systemMessage: "You are a helpful AI assistant")
             .RegisterMiddleware(async (messages, option, innerAgent, ct) =>
             {
                 var reply = await innerAgent.GenerateReplyAsync(messages, option, ct);
-                if (reply is MessageEnvelope<ChatCompletions> chatCompletions)
+                if (reply is MessageEnvelope<ChatCompletion> chatCompletions)
                 {
                     var tokenCount = chatCompletions.Content.Usage.TotalTokens;
                     totalTokenCount += tokenCount;
@@ -53,21 +50,17 @@ public static async Task RunTokenCountAsync()
     public static async Task RunRagTaskAsync()
     {
         #region Create_Agent
-        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new InvalidOperationException("Please set the environment variable OPENAI_API_KEY");
-        var model = "gpt-3.5-turbo";
-        var openaiClient = new OpenAIClient(apiKey);
-        var openaiMessageConnector = new OpenAIChatRequestMessageConnector();
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var agent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "agent",
-            modelName: model,
             systemMessage: "You are a helpful AI assistant")
             .RegisterMessageConnector()
             .RegisterMiddleware(async (messages, option, innerAgent, ct) =>
             {
                 var today = DateTime.UtcNow;
                 var todayMessage = new TextMessage(Role.System, $"Today is {today:yyyy-MM-dd}");
-                messages = messages.Concat(new[] { todayMessage });
+                messages = messages.Concat([todayMessage]);
                 return await innerAgent.GenerateReplyAsync(messages, option, ct);
             })
             .RegisterPrintMessage();
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Chat_With_Agent.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Chat_With_Agent.cs
index 0ac1cda7528..b2cc228496d 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Chat_With_Agent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Chat_With_Agent.cs
@@ -5,7 +5,6 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 #endregion Using
 
 using FluentAssertions;
@@ -17,13 +16,10 @@ public class Chat_With_Agent
     public static async Task RunAsync()
     {
         #region Create_Agent
-        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-3.5-turbo";
-        var openaiClient = new OpenAIClient(apiKey);
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var agent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "agent",
-            modelName: model,
             systemMessage: "You are a helpful AI assistant")
             .RegisterMessageConnector(); // convert OpenAI message to AutoGen message
         #endregion Create_Agent
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Dynamic_Group_Chat.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Dynamic_Group_Chat.cs
index 7acaae4b1f8..dadc295e308 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Dynamic_Group_Chat.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Dynamic_Group_Chat.cs
@@ -6,8 +6,8 @@
 using AutoGen.OpenAI.Extension;
 using AutoGen.SemanticKernel;
 using AutoGen.SemanticKernel.Extension;
-using Azure.AI.OpenAI;
 using Microsoft.SemanticKernel;
+using OpenAI;
 
 namespace AutoGen.BasicSample;
 
@@ -16,14 +16,13 @@ public class Dynamic_Group_Chat
     public static async Task RunAsync()
     {
         var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-3.5-turbo";
+        var model = "gpt-4o-mini";
 
         #region Create_Coder
         var openaiClient = new OpenAIClient(apiKey);
         var coder = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: openaiClient.GetChatClient(model),
             name: "coder",
-            modelName: model,
             systemMessage: "You are a C# coder, when writing csharp code, please put the code between ```csharp and ```")
             .RegisterMessageConnector() // convert OpenAI message to AutoGen message
             .RegisterPrintMessage(); // print the message content
@@ -49,9 +48,8 @@ public static async Task RunAsync()
 
         #region Create_Group
         var admin = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "admin",
-            modelName: model)
+            chatClient: openaiClient.GetChatClient(model),
+            name: "admin")
             .RegisterMessageConnector(); // convert OpenAI message to AutoGen message
 
         var group = new GroupChat(
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/FSM_Group_Chat.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/FSM_Group_Chat.cs
index 59c0aa9ca88..093d0c77ce6 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/FSM_Group_Chat.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/FSM_Group_Chat.cs
@@ -6,7 +6,8 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
+using OpenAI;
+using OpenAI.Chat;
 #endregion Using
 
 namespace AutoGen.BasicSample;
@@ -74,7 +75,7 @@ public async Task<string> SaveProgress(
 
 public class FSM_Group_Chat
 {
-    public static async Task<IAgent> CreateSaveProgressAgent(OpenAIClient client, string model)
+    public static async Task<IAgent> CreateSaveProgressAgent(ChatClient client)
     {
         #region Create_Save_Progress_Agent
         var tool = new FillFormTool();
@@ -86,9 +87,8 @@ public static async Task<IAgent> CreateSaveProgressAgent(OpenAIClient client, st
             });
 
         var chatAgent = new OpenAIChatAgent(
-            openAIClient: client,
+            chatClient: client,
             name: "application",
-            modelName: model,
             systemMessage: """You are a helpful application form assistant who saves progress while user fills application.""")
             .RegisterMessageConnector()
             .RegisterMiddleware(functionCallMiddleware)
@@ -111,42 +111,25 @@ Save progress according to the most recent information provided by user.
         return chatAgent;
     }
 
-    public static async Task<IAgent> CreateAssistantAgent(OpenAIClient openaiClient, string model)
+    public static async Task<IAgent> CreateAssistantAgent(ChatClient chatClient)
     {
         #region Create_Assistant_Agent
         var chatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: chatClient,
             name: "assistant",
-            modelName: model,
             systemMessage: """You create polite prompt to ask user provide missing information""")
             .RegisterMessageConnector()
-            .RegisterPrintMessage()
-            .RegisterMiddleware(async (msgs, option, agent, ct) =>
-            {
-                var lastReply = msgs.Last() ?? throw new Exception("No reply found.");
-                var reply = await agent.GenerateReplyAsync(msgs, option, ct);
-
-                // if application is complete, exit conversation by sending termination message
-                if (lastReply.GetContent()?.Contains("Application information is saved to database.") is true)
-                {
-                    return new TextMessage(Role.Assistant, GroupChatExtension.TERMINATE, from: agent.Name);
-                }
-                else
-                {
-                    return reply;
-                }
-            });
+            .RegisterPrintMessage();
         #endregion Create_Assistant_Agent
         return chatAgent;
     }
 
-    public static async Task<IAgent> CreateUserAgent(OpenAIClient openaiClient, string model)
+    public static async Task<IAgent> CreateUserAgent(ChatClient chatClient)
     {
         #region Create_User_Agent
         var chatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: chatClient,
             name: "user",
-            modelName: model,
             systemMessage: """
             You are a user who is filling an application form. Simply provide the information as requested and answer the questions, don't do anything else.
             
@@ -166,11 +149,12 @@ public static async Task<IAgent> CreateUserAgent(OpenAIClient openaiClient, stri
     public static async Task RunAsync()
     {
         var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-3.5-turbo";
+        var model = "gpt-4o-mini";
         var openaiClient = new OpenAIClient(apiKey);
-        var applicationAgent = await CreateSaveProgressAgent(openaiClient, model);
-        var assistantAgent = await CreateAssistantAgent(openaiClient, model);
-        var userAgent = await CreateUserAgent(openaiClient, model);
+        var chatClient = openaiClient.GetChatClient(model);
+        var applicationAgent = await CreateSaveProgressAgent(chatClient);
+        var assistantAgent = await CreateAssistantAgent(chatClient);
+        var userAgent = await CreateUserAgent(chatClient);
 
         #region Create_Graph
         var userToApplicationTransition = Transition.Create(userAgent, applicationAgent);
@@ -193,9 +177,13 @@ public static async Task RunAsync()
 
         var initialMessage = await assistantAgent.SendAsync("Generate a greeting meesage for user and start the conversation by asking what's their name.");
 
-        var chatHistory = await userAgent.SendMessageToGroupAsync(groupChat, [initialMessage], maxRound: 30);
-
-        var lastMessage = chatHistory.Last();
-        Console.WriteLine(lastMessage.GetContent());
+        var chatHistory = new List<IMessage> { initialMessage };
+        await foreach (var msg in groupChat.SendAsync(chatHistory, maxRound: 30))
+        {
+            if (msg.GetContent().ToLower().Contains("application information is saved to database.") is true)
+            {
+                break;
+            }
+        }
     }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Image_Chat_With_Agent.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Image_Chat_With_Agent.cs
index 5b94a238bbe..e993b3d51f1 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Image_Chat_With_Agent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Image_Chat_With_Agent.cs
@@ -5,7 +5,6 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 #endregion Using
 using FluentAssertions;
 
@@ -16,14 +15,10 @@ public class Image_Chat_With_Agent
     public static async Task RunAsync()
     {
         #region Create_Agent
-        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-4o"; // The model needs to support multimodal inputs
-        var openaiClient = new OpenAIClient(apiKey);
-
+        var gpt4o = LLMConfiguration.GetOpenAIGPT4o_mini();
         var agent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: gpt4o,
             name: "agent",
-            modelName: model,
             systemMessage: "You are a helpful AI assistant")
             .RegisterMessageConnector() // convert OpenAI message to AutoGen message
             .RegisterPrintMessage();
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Streaming_Tool_Call.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Streaming_Tool_Call.cs
index 48ebd127b56..d5cb196f94f 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Streaming_Tool_Call.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Streaming_Tool_Call.cs
@@ -4,8 +4,8 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 using FluentAssertions;
+using OpenAI;
 
 namespace AutoGen.BasicSample.GettingStart;
 
@@ -28,12 +28,11 @@ public static async Task RunAsync()
 
         #region Create_Agent
         var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-4o";
+        var model = "gpt-4o-mini";
         var openaiClient = new OpenAIClient(apiKey);
         var agent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: openaiClient.GetChatClient(model),
             name: "agent",
-            modelName: model,
             systemMessage: "You are a helpful AI assistant")
             .RegisterMessageConnector()
             .RegisterStreamingMiddleware(autoInvokeMiddleware)
diff --git a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Use_Tools_With_Agent.cs b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Use_Tools_With_Agent.cs
index b441fe389da..21a5df4c2ec 100644
--- a/dotnet/sample/AutoGen.BasicSamples/GettingStart/Use_Tools_With_Agent.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/GettingStart/Use_Tools_With_Agent.cs
@@ -5,9 +5,9 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 #endregion Using
 using FluentAssertions;
+using OpenAI;
 
 namespace AutoGen.BasicSample;
 
@@ -50,12 +50,11 @@ public static async Task RunAsync()
 
         #region Create_Agent
         var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-3.5-turbo";
+        var model = "gpt-4o-mini";
         var openaiClient = new OpenAIClient(apiKey);
         var agent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: openaiClient.GetChatClient(model),
             name: "agent",
-            modelName: model,
             systemMessage: "You are a helpful AI assistant")
             .RegisterMessageConnector(); // convert OpenAI message to AutoGen message
         #endregion Create_Agent
diff --git a/dotnet/sample/AutoGen.BasicSamples/LLMConfiguration.cs b/dotnet/sample/AutoGen.BasicSamples/LLMConfiguration.cs
index e492569cdc3..26d9668792e 100644
--- a/dotnet/sample/AutoGen.BasicSamples/LLMConfiguration.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/LLMConfiguration.cs
@@ -1,25 +1,19 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // LLMConfiguration.cs
 
-using AutoGen.OpenAI;
+using OpenAI;
+using OpenAI.Chat;
 
 namespace AutoGen.BasicSample;
 
 internal static class LLMConfiguration
 {
-    public static OpenAIConfig GetOpenAIGPT3_5_Turbo()
+    public static ChatClient GetOpenAIGPT4o_mini()
     {
         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var modelId = "gpt-3.5-turbo";
-        return new OpenAIConfig(openAIKey, modelId);
-    }
-
-    public static OpenAIConfig GetOpenAIGPT4()
-    {
-        var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var modelId = "gpt-4";
+        var modelId = "gpt-4o-mini";
 
-        return new OpenAIConfig(openAIKey, modelId);
+        return new OpenAIClient(openAIKey).GetChatClient(modelId);
     }
 
     public static AzureOpenAIConfig GetAzureOpenAIGPT3_5_Turbo(string? deployName = null)
@@ -29,12 +23,4 @@ public static AzureOpenAIConfig GetAzureOpenAIGPT3_5_Turbo(string? deployName =
         deployName = deployName ?? Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
         return new AzureOpenAIConfig(endpoint, deployName, azureOpenAIKey);
     }
-
-    public static AzureOpenAIConfig GetAzureOpenAIGPT4(string deployName = "gpt-4")
-    {
-        var azureOpenAIKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
-        var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
-
-        return new AzureOpenAIConfig(endpoint, deployName, azureOpenAIKey);
-    }
 }
diff --git a/dotnet/sample/AutoGen.BasicSamples/Program.cs b/dotnet/sample/AutoGen.BasicSamples/Program.cs
index b48e2be4aa1..8817a3df36e 100644
--- a/dotnet/sample/AutoGen.BasicSamples/Program.cs
+++ b/dotnet/sample/AutoGen.BasicSamples/Program.cs
@@ -1,6 +1,59 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Program.cs
 
+//await Example07_Dynamic_GroupChat_Calculate_Fibonacci.RunAsync();
+
 using AutoGen.BasicSample;
-Console.ReadLine();
-await Example17_ReActAgent.RunAsync();
+
+//Define allSamples collection for all examples
+List<Tuple<string, Func<Task>>> allSamples = new List<Tuple<string, Func<Task>>>();
+
+// When a new sample is created please add them to the allSamples collection
+allSamples.Add(new Tuple<string, Func<Task>>("Assistant Agent", async () => { await Example01_AssistantAgent.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Two-agent Math Chat", async () => { await Example02_TwoAgent_MathChat.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Agent Function Call", async () => { await Example03_Agent_FunctionCall.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Dynamic Group Chat Coding Task", async () => { await Example04_Dynamic_GroupChat_Coding_Task.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("DALL-E and GPT4v", async () => { await Example05_Dalle_And_GPT4V.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("User Proxy Agent", async () => { await Example06_UserProxyAgent.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Dynamic Group Chat - Calculate Fibonacci", async () => { await Example07_Dynamic_GroupChat_Calculate_Fibonacci.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("LM Studio", async () => { await Example08_LMStudio.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Semantic Kernel", async () => { await Example10_SemanticKernel.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Sequential Group Chat", async () => { await Sequential_GroupChat_Example.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Two Agent - Fill Application", async () => { await TwoAgent_Fill_Application.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("Mistal Client Agent - Token Count", async () => { await Example14_MistralClientAgent_TokenCount.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("GPT4v - Binary Data Image", async () => { await Example15_GPT4V_BinaryDataImageMessage.RunAsync(); }));
+allSamples.Add(new Tuple<string, Func<Task>>("ReAct Agent", async () => { await Example17_ReActAgent.RunAsync(); }));
+
+
+int idx = 1;
+Dictionary<int, Tuple<string, Func<Task>>> map = new Dictionary<int, Tuple<string, Func<Task>>>();
+Console.WriteLine("Available Examples:\n\n");
+foreach (Tuple<string, Func<Task>> sample in allSamples)
+{
+    map.Add(idx, sample);
+    Console.WriteLine("{0}. {1}", idx++, sample.Item1);
+}
+
+Console.WriteLine("\n\nEnter your selection:");
+
+while (true)
+{
+    var input = Console.ReadLine();
+    if (input == "exit")
+    {
+        break;
+    }
+    int val = Convert.ToInt32(input);
+    if (!map.ContainsKey(val))
+    {
+        Console.WriteLine("Invalid choice");
+    }
+    else
+    {
+        Console.WriteLine("\nRunning {0}", map[val].Item1);
+        await map[val].Item2.Invoke();
+    }
+}
+
+
+
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/AutoGen.OpenAI.Sample.csproj b/dotnet/sample/AutoGen.OpenAI.Sample/AutoGen.OpenAI.Sample.csproj
index 49c0e21c9ec..fcbbb834fc6 100644
--- a/dotnet/sample/AutoGen.OpenAI.Sample/AutoGen.OpenAI.Sample.csproj
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/AutoGen.OpenAI.Sample.csproj
@@ -14,8 +14,9 @@
     <ProjectReference Include="..\..\src\AutoGen.DotnetInteractive\AutoGen.DotnetInteractive.csproj" />
     <ProjectReference Include="..\..\src\AutoGen.Ollama\AutoGen.Ollama.csproj" />
     <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
-    <ProjectReference Include="..\..\src\AutoGen\AutoGen.csproj" />
+    <ProjectReference Include="..\..\src\AutoGen.OpenAI\AutoGen.OpenAI.csproj" />
     <PackageReference Include="FluentAssertions" Version="$(FluentAssertionVersion)" />
+    <PackageReference Include="Azure.AI.OpenAI" Version="$(AzureOpenAIV2Version)" />
   </ItemGroup>
 
 </Project>
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Azure_OpenAI.cs b/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Azure_OpenAI.cs
new file mode 100644
index 00000000000..dafe2e31485
--- /dev/null
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Azure_OpenAI.cs
@@ -0,0 +1,39 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Connect_To_Azure_OpenAI.cs
+
+#region using_statement
+using AutoGen.Core;
+using AutoGen.OpenAI.Extension;
+using Azure;
+using Azure.AI.OpenAI;
+#endregion using_statement
+
+namespace AutoGen.OpenAI.Sample;
+
+public class Connect_To_Azure_OpenAI
+{
+    public static async Task RunAsync()
+    {
+        #region create_agent
+        var apiKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new InvalidOperationException("Please set environment variable AZURE_OPENAI_API_KEY");
+        var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("Please set environment variable AZURE_OPENAI_ENDPOINT");
+        var model = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? "gpt-4o-mini";
+
+        // Use AzureOpenAIClient to connect to openai model deployed on azure.
+        // The AzureOpenAIClient comes from Azure.AI.OpenAI package
+        var openAIClient = new AzureOpenAIClient(new Uri(endpoint), new AzureKeyCredential(apiKey));
+
+        var agent = new OpenAIChatAgent(
+            chatClient: openAIClient.GetChatClient(model),
+            name: "assistant",
+            systemMessage: "You are a helpful assistant designed to output JSON.",
+            seed: 0)
+            .RegisterMessageConnector()
+            .RegisterPrintMessage();
+        #endregion create_agent
+
+        #region send_message
+        await agent.SendAsync("Can you write a piece of C# code to calculate 100th of fibonacci?");
+        #endregion send_message
+    }
+}
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Ollama.cs b/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Ollama.cs
index 3823de2a528..2bb10e97841 100644
--- a/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Ollama.cs
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/Connect_To_Ollama.cs
@@ -4,51 +4,27 @@
 #region using_statement
 using AutoGen.Core;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
-using Azure.Core.Pipeline;
+using OpenAI;
 #endregion using_statement
 
 namespace AutoGen.OpenAI.Sample;
 
-#region CustomHttpClientHandler
-public sealed class CustomHttpClientHandler : HttpClientHandler
-{
-    private string _modelServiceUrl;
-
-    public CustomHttpClientHandler(string modelServiceUrl)
-    {
-        _modelServiceUrl = modelServiceUrl;
-    }
-
-    protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
-    {
-        request.RequestUri = new Uri($"{_modelServiceUrl}{request.RequestUri.PathAndQuery}");
-
-        return base.SendAsync(request, cancellationToken);
-    }
-}
-#endregion CustomHttpClientHandler
-
 public class Connect_To_Ollama
 {
     public static async Task RunAsync()
     {
         #region create_agent
-        using var client = new HttpClient(new CustomHttpClientHandler("http://localhost:11434"));
-        var option = new OpenAIClientOptions(OpenAIClientOptions.ServiceVersion.V2024_04_01_Preview)
-        {
-            Transport = new HttpClientTransport(client),
-        };
-
         // api-key is not required for local server
         // so you can use any string here
-        var openAIClient = new OpenAIClient("api-key", option);
+        var openAIClient = new OpenAIClient("api-key", new OpenAIClientOptions
+        {
+            Endpoint = new Uri("http://localhost:11434/v1/"), // remember to add /v1/ at the end to connect to Ollama openai server
+        });
         var model = "llama3";
 
         var agent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: openAIClient.GetChatClient(model),
             name: "assistant",
-            modelName: model,
             systemMessage: "You are a helpful assistant designed to output JSON.",
             seed: 0)
             .RegisterMessageConnector()
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/Program.cs b/dotnet/sample/AutoGen.OpenAI.Sample/Program.cs
index 5a38a3ff03b..c71f152d037 100644
--- a/dotnet/sample/AutoGen.OpenAI.Sample/Program.cs
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/Program.cs
@@ -3,4 +3,4 @@
 
 using AutoGen.OpenAI.Sample;
 
-Tool_Call_With_Ollama_And_LiteLLM.RunAsync().Wait();
+Structural_Output.RunAsync().Wait();
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/Structural_Output.cs b/dotnet/sample/AutoGen.OpenAI.Sample/Structural_Output.cs
new file mode 100644
index 00000000000..e562d7223a6
--- /dev/null
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/Structural_Output.cs
@@ -0,0 +1,90 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Structural_Output.cs
+
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using AutoGen.Core;
+using AutoGen.OpenAI.Extension;
+using FluentAssertions;
+using Json.Schema;
+using Json.Schema.Generation;
+using OpenAI;
+using OpenAI.Chat;
+
+namespace AutoGen.OpenAI.Sample;
+
+internal class Structural_Output
+{
+    public static async Task RunAsync()
+    {
+        #region create_agent
+        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
+        var model = "gpt-4o-mini";
+
+        var schemaBuilder = new JsonSchemaBuilder().FromType<Person>();
+        var schema = schemaBuilder.Build();
+
+        var personSchemaFormat = ChatResponseFormat.CreateJsonSchemaFormat(
+            name: "Person",
+            jsonSchema: BinaryData.FromObjectAsJson(schema),
+            description: "Person schema");
+
+        var openAIClient = new OpenAIClient(apiKey);
+        var openAIClientAgent = new OpenAIChatAgent(
+            chatClient: openAIClient.GetChatClient(model),
+            name: "assistant",
+            systemMessage: "You are a helpful assistant",
+            responseFormat: personSchemaFormat) // structural output by passing schema to response format
+            .RegisterMessageConnector()
+            .RegisterPrintMessage();
+        #endregion create_agent
+
+        #region chat_with_agent
+        var reply = await openAIClientAgent.SendAsync("My name is John, I am 25 years old, and I live in Seattle. I like to play soccer and read books.");
+
+        var person = JsonSerializer.Deserialize<Person>(reply.GetContent());
+        Console.WriteLine($"Name: {person.Name}");
+        Console.WriteLine($"Age: {person.Age}");
+
+        if (!string.IsNullOrEmpty(person.Address))
+        {
+            Console.WriteLine($"Address: {person.Address}");
+        }
+
+        Console.WriteLine("Done.");
+        #endregion chat_with_agent
+
+        person.Name.Should().Be("John");
+        person.Age.Should().Be(25);
+        person.Address.Should().BeNullOrEmpty();
+        person.City.Should().Be("Seattle");
+        person.Hobbies.Count.Should().Be(2);
+    }
+}
+
+#region person_class
+public class Person
+{
+    [JsonPropertyName("name")]
+    [Description("Name of the person")]
+    [Required]
+    public string Name { get; set; }
+
+    [JsonPropertyName("age")]
+    [Description("Age of the person")]
+    [Required]
+    public int Age { get; set; }
+
+    [JsonPropertyName("city")]
+    [Description("City of the person")]
+    public string? City { get; set; }
+
+    [JsonPropertyName("address")]
+    [Description("Address of the person")]
+    public string? Address { get; set; }
+
+    [JsonPropertyName("hobbies")]
+    [Description("Hobbies of the person")]
+    public List<string>? Hobbies { get; set; }
+}
+#endregion person_class
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/Tool_Call_With_Ollama_And_LiteLLM.cs b/dotnet/sample/AutoGen.OpenAI.Sample/Tool_Call_With_Ollama_And_LiteLLM.cs
index b0b0adc0e6f..ed43c628a67 100644
--- a/dotnet/sample/AutoGen.OpenAI.Sample/Tool_Call_With_Ollama_And_LiteLLM.cs
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/Tool_Call_With_Ollama_And_LiteLLM.cs
@@ -3,8 +3,7 @@
 
 using AutoGen.Core;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
-using Azure.Core.Pipeline;
+using OpenAI;
 
 namespace AutoGen.OpenAI.Sample;
 
@@ -43,20 +42,17 @@ public static async Task RunAsync()
         #endregion Create_tools
         #region Create_Agent
         var liteLLMUrl = "http://localhost:4000";
-        using var httpClient = new HttpClient(new CustomHttpClientHandler(liteLLMUrl));
-        var option = new OpenAIClientOptions(OpenAIClientOptions.ServiceVersion.V2024_04_01_Preview)
-        {
-            Transport = new HttpClientTransport(httpClient),
-        };
 
         // api-key is not required for local server
         // so you can use any string here
-        var openAIClient = new OpenAIClient("api-key", option);
+        var openAIClient = new OpenAIClient("api-key", new OpenAIClientOptions
+        {
+            Endpoint = new Uri("http://localhost:4000"),
+        });
 
         var agent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: openAIClient.GetChatClient("dolphincoder:latest"),
             name: "assistant",
-            modelName: "dolphincoder:latest",
             systemMessage: "You are a helpful AI assistant")
             .RegisterMessageConnector()
             .RegisterMiddleware(functionMiddleware)
diff --git a/dotnet/sample/AutoGen.OpenAI.Sample/Use_Json_Mode.cs b/dotnet/sample/AutoGen.OpenAI.Sample/Use_Json_Mode.cs
index d92983c5050..392796d819f 100644
--- a/dotnet/sample/AutoGen.OpenAI.Sample/Use_Json_Mode.cs
+++ b/dotnet/sample/AutoGen.OpenAI.Sample/Use_Json_Mode.cs
@@ -6,8 +6,9 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 using FluentAssertions;
+using OpenAI;
+using OpenAI.Chat;
 
 namespace AutoGen.BasicSample;
 
@@ -17,16 +18,15 @@ public static async Task RunAsync()
     {
         #region create_agent
         var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var model = "gpt-3.5-turbo";
+        var model = "gpt-4o-mini";
 
         var openAIClient = new OpenAIClient(apiKey);
         var openAIClientAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
+            chatClient: openAIClient.GetChatClient(model),
             name: "assistant",
-            modelName: model,
             systemMessage: "You are a helpful assistant designed to output JSON.",
             seed: 0, // explicitly set a seed to enable deterministic output
-            responseFormat: ChatCompletionsResponseFormat.JsonObject) // set response format to JSON object to enable JSON mode
+            responseFormat: ChatResponseFormat.JsonObject) // set response format to JSON object to enable JSON mode
             .RegisterMessageConnector()
             .RegisterPrintMessage();
         #endregion create_agent
diff --git a/dotnet/sample/AutoGen.SemanticKernel.Sample/AutoGen.SemanticKernel.Sample.csproj b/dotnet/sample/AutoGen.SemanticKernel.Sample/AutoGen.SemanticKernel.Sample.csproj
index df1064e18c4..45514431368 100644
--- a/dotnet/sample/AutoGen.SemanticKernel.Sample/AutoGen.SemanticKernel.Sample.csproj
+++ b/dotnet/sample/AutoGen.SemanticKernel.Sample/AutoGen.SemanticKernel.Sample.csproj
@@ -9,8 +9,9 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <ProjectReference Include="..\..\src\AutoGen.OpenAI\AutoGen.OpenAI.csproj" />
+    <ProjectReference Include="..\..\src\AutoGen.SemanticKernel\AutoGen.SemanticKernel.csproj" />
     <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
-    <ProjectReference Include="..\AutoGen.BasicSamples\AutoGen.BasicSample.csproj" />
     <PackageReference Include="Microsoft.SemanticKernel.Plugins.Web" Version="$(SemanticKernelExperimentalVersion)" />
   </ItemGroup>
 
diff --git a/dotnet/sample/AutoGen.SemanticKernel.Sample/Use_Kernel_Functions_With_Other_Agent.cs b/dotnet/sample/AutoGen.SemanticKernel.Sample/Use_Kernel_Functions_With_Other_Agent.cs
index 2beb1ee7df0..700bdfe75c7 100644
--- a/dotnet/sample/AutoGen.SemanticKernel.Sample/Use_Kernel_Functions_With_Other_Agent.cs
+++ b/dotnet/sample/AutoGen.SemanticKernel.Sample/Use_Kernel_Functions_With_Other_Agent.cs
@@ -5,8 +5,8 @@
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 using Microsoft.SemanticKernel;
+using OpenAI;
 #endregion Using
 
 namespace AutoGen.SemanticKernel.Sample;
@@ -17,7 +17,7 @@ public static async Task RunAsync()
     {
         #region Create_plugin
         var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
-        var modelId = "gpt-3.5-turbo";
+        var modelId = "gpt-4o-mini";
         var kernelBuilder = Kernel.CreateBuilder();
         var kernel = kernelBuilder.Build();
         var getWeatherFunction = KernelFunctionFactory.CreateFromMethod(
@@ -33,9 +33,8 @@ public static async Task RunAsync()
 
         var openAIClient = new OpenAIClient(openAIKey);
         var openAIAgent = new OpenAIChatAgent(
-            openAIClient: openAIClient,
-            name: "assistant",
-            modelName: modelId)
+            chatClient: openAIClient.GetChatClient(modelId),
+            name: "assistant")
             .RegisterMessageConnector() // register message connector so it support AutoGen built-in message types like TextMessage.
             .RegisterMiddleware(kernelPluginMiddleware) // register the middleware to handle the plugin functions
             .RegisterPrintMessage(); // pretty print the message to the console
diff --git a/dotnet/src/AutoGen.Anthropic/Agent/AnthropicClientAgent.cs b/dotnet/src/AutoGen.Anthropic/Agent/AnthropicClientAgent.cs
index 73510baeb71..81fa8e6438a 100644
--- a/dotnet/src/AutoGen.Anthropic/Agent/AnthropicClientAgent.cs
+++ b/dotnet/src/AutoGen.Anthropic/Agent/AnthropicClientAgent.cs
@@ -64,7 +64,7 @@ private ChatCompletionRequest CreateParameters(IEnumerable<IMessage> messages, G
     {
         var chatCompletionRequest = new ChatCompletionRequest()
         {
-            SystemMessage = _systemMessage,
+            SystemMessage = [new SystemMessage { Text = _systemMessage }],
             MaxTokens = options?.MaxToken ?? _maxTokens,
             Model = _modelName,
             Stream = shouldStream,
diff --git a/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs b/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs
index c58b2c1952e..f106e08d35c 100644
--- a/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs
+++ b/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs
@@ -24,12 +24,13 @@ public sealed class AnthropicClient : IDisposable
     private static readonly JsonSerializerOptions JsonSerializerOptions = new()
     {
         DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
-        Converters = { new ContentBaseConverter(), new JsonPropertyNameEnumConverter<ToolChoiceType>() }
-    };
-
-    private static readonly JsonSerializerOptions JsonDeserializerOptions = new()
-    {
-        Converters = { new ContentBaseConverter(), new JsonPropertyNameEnumConverter<ToolChoiceType>() }
+        Converters =
+        {
+            new ContentBaseConverter(),
+            new JsonPropertyNameEnumConverter<ToolChoiceType>(),
+            new JsonPropertyNameEnumConverter<CacheControlType>(),
+            new SystemMessageConverter(),
+        }
     };
 
     public AnthropicClient(HttpClient httpClient, string baseUrl, string apiKey)
@@ -135,12 +136,13 @@ private Task<HttpResponseMessage> SendRequestAsync<T>(T requestObject, Cancellat
         var httpRequestMessage = new HttpRequestMessage(HttpMethod.Post, _baseUrl);
         var jsonRequest = JsonSerializer.Serialize(requestObject, JsonSerializerOptions);
         httpRequestMessage.Content = new StringContent(jsonRequest, Encoding.UTF8, "application/json");
+        httpRequestMessage.Headers.Add("anthropic-beta", "prompt-caching-2024-07-31");
         return _httpClient.SendAsync(httpRequestMessage, cancellationToken);
     }
 
     private async Task<T> DeserializeResponseAsync<T>(Stream responseStream, CancellationToken cancellationToken)
     {
-        return await JsonSerializer.DeserializeAsync<T>(responseStream, JsonDeserializerOptions, cancellationToken)
+        return await JsonSerializer.DeserializeAsync<T>(responseStream, JsonSerializerOptions, cancellationToken)
                ?? throw new Exception("Failed to deserialize response");
     }
 
diff --git a/dotnet/src/AutoGen.Anthropic/Converters/SystemMessageConverter.cs b/dotnet/src/AutoGen.Anthropic/Converters/SystemMessageConverter.cs
new file mode 100644
index 00000000000..5bbe8a3a37f
--- /dev/null
+++ b/dotnet/src/AutoGen.Anthropic/Converters/SystemMessageConverter.cs
@@ -0,0 +1,42 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// SystemMessageConverter.cs
+
+using System;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using AutoGen.Anthropic.DTO;
+
+namespace AutoGen.Anthropic.Converters;
+
+public class SystemMessageConverter : JsonConverter<object>
+{
+    public override object Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
+    {
+        if (reader.TokenType == JsonTokenType.String)
+        {
+            return reader.GetString() ?? string.Empty;
+        }
+        if (reader.TokenType == JsonTokenType.StartArray)
+        {
+            return JsonSerializer.Deserialize<SystemMessage[]>(ref reader, options) ?? throw new InvalidOperationException();
+        }
+
+        throw new JsonException();
+    }
+
+    public override void Write(Utf8JsonWriter writer, object value, JsonSerializerOptions options)
+    {
+        if (value is string stringValue)
+        {
+            writer.WriteStringValue(stringValue);
+        }
+        else if (value is SystemMessage[] arrayValue)
+        {
+            JsonSerializer.Serialize(writer, arrayValue, options);
+        }
+        else
+        {
+            throw new JsonException();
+        }
+    }
+}
diff --git a/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs b/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs
index 463ee7fc259..dfb86ef0af5 100644
--- a/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs
+++ b/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs
@@ -14,7 +14,7 @@ public class ChatCompletionRequest
     public List<ChatMessage> Messages { get; set; }
 
     [JsonPropertyName("system")]
-    public string? SystemMessage { get; set; }
+    public SystemMessage[]? SystemMessage { get; set; }
 
     [JsonPropertyName("max_tokens")]
     public int MaxTokens { get; set; }
@@ -49,6 +49,26 @@ public ChatCompletionRequest()
     }
 }
 
+public class SystemMessage
+{
+    [JsonPropertyName("text")]
+    public string? Text { get; set; }
+
+    [JsonPropertyName("type")]
+    public string? Type { get; private set; } = "text";
+
+    [JsonPropertyName("cache_control")]
+    public CacheControl? CacheControl { get; set; }
+
+    public static SystemMessage CreateSystemMessage(string systemMessage) => new() { Text = systemMessage };
+
+    public static SystemMessage CreateSystemMessageWithCacheControl(string systemMessage) => new()
+    {
+        Text = systemMessage,
+        CacheControl = new CacheControl { Type = CacheControlType.Ephemeral }
+    };
+}
+
 public class ChatMessage
 {
     [JsonPropertyName("role")]
diff --git a/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionResponse.cs b/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionResponse.cs
index fc33aa0e26b..a142f2feacc 100644
--- a/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionResponse.cs
+++ b/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionResponse.cs
@@ -70,6 +70,12 @@ public class Usage
 
     [JsonPropertyName("output_tokens")]
     public int OutputTokens { get; set; }
+
+    [JsonPropertyName("cache_creation_input_tokens")]
+    public int CacheCreationInputTokens { get; set; }
+
+    [JsonPropertyName("cache_read_input_tokens")]
+    public int CacheReadInputTokens { get; set; }
 }
 
 public class Delta
diff --git a/dotnet/src/AutoGen.Anthropic/DTO/Content.cs b/dotnet/src/AutoGen.Anthropic/DTO/Content.cs
index 353cf6ae824..ade913b827c 100644
--- a/dotnet/src/AutoGen.Anthropic/DTO/Content.cs
+++ b/dotnet/src/AutoGen.Anthropic/DTO/Content.cs
@@ -3,6 +3,7 @@
 
 using System.Text.Json.Nodes;
 using System.Text.Json.Serialization;
+using AutoGen.Anthropic.Converters;
 
 namespace AutoGen.Anthropic.DTO;
 
@@ -10,6 +11,9 @@ public abstract class ContentBase
 {
     [JsonPropertyName("type")]
     public abstract string Type { get; }
+
+    [JsonPropertyName("cache_control")]
+    public CacheControl? CacheControl { get; set; }
 }
 
 public class TextContent : ContentBase
@@ -19,6 +23,12 @@ public class TextContent : ContentBase
 
     [JsonPropertyName("text")]
     public string? Text { get; set; }
+
+    public static TextContent CreateTextWithCacheControl(string text) => new()
+    {
+        Text = text,
+        CacheControl = new CacheControl { Type = CacheControlType.Ephemeral }
+    };
 }
 
 public class ImageContent : ContentBase
@@ -68,3 +78,18 @@ public class ToolResultContent : ContentBase
     [JsonPropertyName("content")]
     public string? Content { get; set; }
 }
+
+public class CacheControl
+{
+    [JsonPropertyName("type")]
+    public CacheControlType Type { get; set; }
+
+    public static CacheControl Create() => new CacheControl { Type = CacheControlType.Ephemeral };
+}
+
+[JsonConverter(typeof(JsonPropertyNameEnumConverter<CacheControlType>))]
+public enum CacheControlType
+{
+    [JsonPropertyName("ephemeral")]
+    Ephemeral
+}
diff --git a/dotnet/src/AutoGen.Anthropic/DTO/Tool.cs b/dotnet/src/AutoGen.Anthropic/DTO/Tool.cs
index 2a46bc42a35..3845c444592 100644
--- a/dotnet/src/AutoGen.Anthropic/DTO/Tool.cs
+++ b/dotnet/src/AutoGen.Anthropic/DTO/Tool.cs
@@ -16,6 +16,9 @@ public class Tool
 
     [JsonPropertyName("input_schema")]
     public InputSchema? InputSchema { get; set; }
+
+    [JsonPropertyName("cache_control")]
+    public CacheControl? CacheControl { get; set; }
 }
 
 public class InputSchema
diff --git a/dotnet/src/AutoGen.Anthropic/Utils/AnthropicConstants.cs b/dotnet/src/AutoGen.Anthropic/Utils/AnthropicConstants.cs
index 6fd70cb4ee3..494a6686f52 100644
--- a/dotnet/src/AutoGen.Anthropic/Utils/AnthropicConstants.cs
+++ b/dotnet/src/AutoGen.Anthropic/Utils/AnthropicConstants.cs
@@ -11,4 +11,5 @@ public static class AnthropicConstants
     public static string Claude3Opus = "claude-3-opus-20240229";
     public static string Claude3Sonnet = "claude-3-sonnet-20240229";
     public static string Claude3Haiku = "claude-3-haiku-20240307";
+    public static string Claude35Sonnet = "claude-3-5-sonnet-20240620";
 }
diff --git a/dotnet/src/AutoGen.AzureAIInference/Agent/ChatCompletionsClientAgent.cs b/dotnet/src/AutoGen.AzureAIInference/Agent/ChatCompletionsClientAgent.cs
new file mode 100644
index 00000000000..452c5b1c307
--- /dev/null
+++ b/dotnet/src/AutoGen.AzureAIInference/Agent/ChatCompletionsClientAgent.cs
@@ -0,0 +1,202 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// ChatCompletionsClientAgent.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+using AutoGen.AzureAIInference.Extension;
+using AutoGen.Core;
+using Azure.AI.Inference;
+
+namespace AutoGen.AzureAIInference;
+
+/// <summary>
+/// ChatCompletions client agent. This agent is a thin wrapper around <see cref="ChatCompletionsClient"/> to provide a simple interface for chat completions.
+/// <para><see cref="ChatCompletionsClientAgent" /> supports the following message types:</para>
+/// <list type="bullet">
+/// <item>
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatRequestMessage"/>: chat request message.
+/// </item>
+/// </list>
+/// <para><see cref="ChatCompletionsClientAgent" /> returns the following message types:</para>
+/// <list type="bullet">
+/// <item>
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatResponseMessage"/>: chat response message.
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="StreamingChatCompletionsUpdate"/>: streaming chat completions update.
+/// </item>
+/// </list>
+/// </summary>
+public class ChatCompletionsClientAgent : IStreamingAgent
+{
+    private readonly ChatCompletionsClient chatCompletionsClient;
+    private readonly ChatCompletionsOptions options;
+    private readonly string systemMessage;
+
+    /// <summary>
+    /// Create a new instance of <see cref="ChatCompletionsClientAgent"/>.
+    /// </summary>
+    /// <param name="chatCompletionsClient">chat completions client</param>
+    /// <param name="name">agent name</param>
+    /// <param name="modelName">model name. e.g. gpt-turbo-3.5</param>
+    /// <param name="systemMessage">system message</param>
+    /// <param name="temperature">temperature</param>
+    /// <param name="maxTokens">max tokens to generated</param>
+    /// <param name="responseFormat">response format, set it to <see cref="ChatCompletionsResponseFormatJSON"/> to enable json mode.</param>
+    /// <param name="seed">seed to use, set it to enable deterministic output</param>
+    /// <param name="functions">functions</param>
+    public ChatCompletionsClientAgent(
+        ChatCompletionsClient chatCompletionsClient,
+        string name,
+        string modelName,
+        string systemMessage = "You are a helpful AI assistant",
+        float temperature = 0.7f,
+        int maxTokens = 1024,
+        int? seed = null,
+        ChatCompletionsResponseFormat? responseFormat = null,
+        IEnumerable<FunctionDefinition>? functions = null)
+        : this(
+            chatCompletionsClient: chatCompletionsClient,
+            name: name,
+            options: CreateChatCompletionOptions(modelName, temperature, maxTokens, seed, responseFormat, functions),
+            systemMessage: systemMessage)
+    {
+    }
+
+    /// <summary>
+    /// Create a new instance of <see cref="ChatCompletionsClientAgent"/>.
+    /// </summary>
+    /// <param name="chatCompletionsClient">chat completions client</param>
+    /// <param name="name">agent name</param>
+    /// <param name="systemMessage">system message</param>
+    /// <param name="options">chat completion option. The option can't contain messages</param>
+    public ChatCompletionsClientAgent(
+        ChatCompletionsClient chatCompletionsClient,
+        string name,
+        ChatCompletionsOptions options,
+        string systemMessage = "You are a helpful AI assistant")
+    {
+        if (options.Messages is { Count: > 0 })
+        {
+            throw new ArgumentException("Messages should not be provided in options");
+        }
+
+        this.chatCompletionsClient = chatCompletionsClient;
+        this.Name = name;
+        this.options = options;
+        this.systemMessage = systemMessage;
+    }
+
+    public string Name { get; }
+
+    public async Task<IMessage> GenerateReplyAsync(
+        IEnumerable<IMessage> messages,
+        GenerateReplyOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var settings = this.CreateChatCompletionsOptions(options, messages);
+        var reply = await this.chatCompletionsClient.CompleteAsync(settings, cancellationToken: cancellationToken);
+
+        return new MessageEnvelope<ChatCompletions>(reply, from: this.Name);
+    }
+
+    public async IAsyncEnumerable<IMessage> GenerateStreamingReplyAsync(
+        IEnumerable<IMessage> messages,
+        GenerateReplyOptions? options = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var settings = this.CreateChatCompletionsOptions(options, messages);
+        var response = await this.chatCompletionsClient.CompleteStreamingAsync(settings, cancellationToken);
+        await foreach (var update in response.WithCancellation(cancellationToken))
+        {
+            yield return new MessageEnvelope<StreamingChatCompletionsUpdate>(update, from: this.Name);
+        }
+    }
+
+    private ChatCompletionsOptions CreateChatCompletionsOptions(GenerateReplyOptions? options, IEnumerable<IMessage> messages)
+    {
+        var oaiMessages = messages.Select(m => m switch
+        {
+            IMessage<ChatRequestMessage> chatRequestMessage => chatRequestMessage.Content,
+            _ => throw new ArgumentException("Invalid message type")
+        });
+
+        // add system message if there's no system message in messages
+        if (!oaiMessages.Any(m => m is ChatRequestSystemMessage))
+        {
+            oaiMessages = new[] { new ChatRequestSystemMessage(systemMessage) }.Concat(oaiMessages);
+        }
+
+        // clone the options by serializing and deserializing
+        var json = JsonSerializer.Serialize(this.options);
+        var settings = JsonSerializer.Deserialize<ChatCompletionsOptions>(json) ?? throw new InvalidOperationException("Failed to clone options");
+
+        foreach (var m in oaiMessages)
+        {
+            settings.Messages.Add(m);
+        }
+
+        settings.Temperature = options?.Temperature ?? settings.Temperature;
+        settings.MaxTokens = options?.MaxToken ?? settings.MaxTokens;
+
+        foreach (var functions in this.options.Tools)
+        {
+            settings.Tools.Add(functions);
+        }
+
+        foreach (var stopSequence in this.options.StopSequences)
+        {
+            settings.StopSequences.Add(stopSequence);
+        }
+
+        var openAIFunctionDefinitions = options?.Functions?.Select(f => f.ToAzureAIInferenceFunctionDefinition()).ToList();
+        if (openAIFunctionDefinitions is { Count: > 0 })
+        {
+            foreach (var f in openAIFunctionDefinitions)
+            {
+                settings.Tools.Add(new ChatCompletionsFunctionToolDefinition(f));
+            }
+        }
+
+        if (options?.StopSequence is var sequence && sequence is { Length: > 0 })
+        {
+            foreach (var seq in sequence)
+            {
+                settings.StopSequences.Add(seq);
+            }
+        }
+
+        return settings;
+    }
+
+    private static ChatCompletionsOptions CreateChatCompletionOptions(
+        string modelName,
+        float temperature = 0.7f,
+        int maxTokens = 1024,
+        int? seed = null,
+        ChatCompletionsResponseFormat? responseFormat = null,
+        IEnumerable<FunctionDefinition>? functions = null)
+    {
+        var options = new ChatCompletionsOptions()
+        {
+            Model = modelName,
+            Temperature = temperature,
+            MaxTokens = maxTokens,
+            Seed = seed,
+            ResponseFormat = responseFormat,
+        };
+
+        if (functions is not null)
+        {
+            foreach (var f in functions)
+            {
+                options.Tools.Add(new ChatCompletionsFunctionToolDefinition(f));
+            }
+        }
+
+        return options;
+    }
+}
diff --git a/dotnet/src/AutoGen.AzureAIInference/AutoGen.AzureAIInference.csproj b/dotnet/src/AutoGen.AzureAIInference/AutoGen.AzureAIInference.csproj
new file mode 100644
index 00000000000..e9401bc4bc2
--- /dev/null
+++ b/dotnet/src/AutoGen.AzureAIInference/AutoGen.AzureAIInference.csproj
@@ -0,0 +1,25 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <TargetFrameworks>$(PackageTargetFrameworks)</TargetFrameworks>
+    <RootNamespace>AutoGen.AzureAIInference</RootNamespace>
+  </PropertyGroup>
+
+  <Import Project="$(RepoRoot)/nuget/nuget-package.props" />
+
+  <PropertyGroup>
+    <!-- NuGet Package Settings -->
+    <Title>AutoGen.AzureAIInference</Title>
+    <Description>
+      Azure AI Inference Intergration for AutoGen.
+    </Description>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Azure.AI.Inference" Version="$(AzureAIInferenceVersion)" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\AutoGen.Core\AutoGen.Core.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet/src/AutoGen.AzureAIInference/Extension/ChatComptionClientAgentExtension.cs b/dotnet/src/AutoGen.AzureAIInference/Extension/ChatComptionClientAgentExtension.cs
new file mode 100644
index 00000000000..8faf29604ed
--- /dev/null
+++ b/dotnet/src/AutoGen.AzureAIInference/Extension/ChatComptionClientAgentExtension.cs
@@ -0,0 +1,39 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// ChatComptionClientAgentExtension.cs
+
+using AutoGen.Core;
+
+namespace AutoGen.AzureAIInference.Extension;
+
+public static class ChatComptionClientAgentExtension
+{
+    /// <summary>
+    /// Register an <see cref="AzureAIInferenceChatRequestMessageConnector"/> to the <see cref="ChatCompletionsClientAgent"/>
+    /// </summary>
+    /// <param name="connector">the connector to use. If null, a new instance of <see cref="AzureAIInferenceChatRequestMessageConnector"/> will be created.</param>
+    public static MiddlewareStreamingAgent<ChatCompletionsClientAgent> RegisterMessageConnector(
+        this ChatCompletionsClientAgent agent, AzureAIInferenceChatRequestMessageConnector? connector = null)
+    {
+        if (connector == null)
+        {
+            connector = new AzureAIInferenceChatRequestMessageConnector();
+        }
+
+        return agent.RegisterStreamingMiddleware(connector);
+    }
+
+    /// <summary>
+    /// Register an <see cref="AzureAIInferenceChatRequestMessageConnector"/> to the <see cref="MiddlewareAgent{T}"/> where T is <see cref="ChatCompletionsClientAgent"/>
+    /// </summary>
+    /// <param name="connector">the connector to use. If null, a new instance of <see cref="AzureAIInferenceChatRequestMessageConnector"/> will be created.</param>
+    public static MiddlewareStreamingAgent<ChatCompletionsClientAgent> RegisterMessageConnector(
+        this MiddlewareStreamingAgent<ChatCompletionsClientAgent> agent, AzureAIInferenceChatRequestMessageConnector? connector = null)
+    {
+        if (connector == null)
+        {
+            connector = new AzureAIInferenceChatRequestMessageConnector();
+        }
+
+        return agent.RegisterStreamingMiddleware(connector);
+    }
+}
diff --git a/dotnet/src/AutoGen.AzureAIInference/Extension/FunctionContractExtension.cs b/dotnet/src/AutoGen.AzureAIInference/Extension/FunctionContractExtension.cs
new file mode 100644
index 00000000000..4cd7b3864f9
--- /dev/null
+++ b/dotnet/src/AutoGen.AzureAIInference/Extension/FunctionContractExtension.cs
@@ -0,0 +1,64 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// FunctionContractExtension.cs
+
+using System;
+using System.Collections.Generic;
+using AutoGen.Core;
+using Azure.AI.Inference;
+using Json.Schema;
+using Json.Schema.Generation;
+
+namespace AutoGen.AzureAIInference.Extension;
+
+public static class FunctionContractExtension
+{
+    /// <summary>
+    /// Convert a <see cref="FunctionContract"/> to a <see cref="FunctionDefinition"/> that can be used in gpt funciton call.
+    /// </summary>
+    /// <param name="functionContract">function contract</param>
+    /// <returns><see cref="FunctionDefinition"/></returns>
+    public static FunctionDefinition ToAzureAIInferenceFunctionDefinition(this FunctionContract functionContract)
+    {
+        var functionDefinition = new FunctionDefinition
+        {
+            Name = functionContract.Name,
+            Description = functionContract.Description,
+        };
+        var requiredParameterNames = new List<string>();
+        var propertiesSchemas = new Dictionary<string, JsonSchema>();
+        var propertySchemaBuilder = new JsonSchemaBuilder().Type(SchemaValueType.Object);
+        foreach (var param in functionContract.Parameters ?? [])
+        {
+            if (param.Name is null)
+            {
+                throw new InvalidOperationException("Parameter name cannot be null");
+            }
+
+            var schemaBuilder = new JsonSchemaBuilder().FromType(param.ParameterType ?? throw new ArgumentNullException(nameof(param.ParameterType)));
+            if (param.Description != null)
+            {
+                schemaBuilder = schemaBuilder.Description(param.Description);
+            }
+
+            if (param.IsRequired)
+            {
+                requiredParameterNames.Add(param.Name);
+            }
+
+            var schema = schemaBuilder.Build();
+            propertiesSchemas[param.Name] = schema;
+
+        }
+        propertySchemaBuilder = propertySchemaBuilder.Properties(propertiesSchemas);
+        propertySchemaBuilder = propertySchemaBuilder.Required(requiredParameterNames);
+
+        var option = new System.Text.Json.JsonSerializerOptions()
+        {
+            PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase
+        };
+
+        functionDefinition.Parameters = BinaryData.FromObjectAsJson(propertySchemaBuilder.Build(), option);
+
+        return functionDefinition;
+    }
+}
diff --git a/dotnet/src/AutoGen.AzureAIInference/Middleware/AzureAIInferenceChatRequestMessageConnector.cs b/dotnet/src/AutoGen.AzureAIInference/Middleware/AzureAIInferenceChatRequestMessageConnector.cs
new file mode 100644
index 00000000000..9c5d22e2e7e
--- /dev/null
+++ b/dotnet/src/AutoGen.AzureAIInference/Middleware/AzureAIInferenceChatRequestMessageConnector.cs
@@ -0,0 +1,302 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// AzureAIInferenceChatRequestMessageConnector.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using AutoGen.Core;
+using Azure.AI.Inference;
+
+namespace AutoGen.AzureAIInference;
+
+/// <summary>
+/// This middleware converts the incoming <see cref="IMessage"/> to <see cref="IMessage{ChatRequestMessage}" /> where T is <see cref="ChatRequestMessage"/> before sending to agent. And converts the output <see cref="ChatResponseMessage"/> to <see cref="IMessage"/> after receiving from agent.
+/// <para>Supported <see cref="IMessage"/> are</para>
+/// <para>- <see cref="TextMessage"/></para> 
+/// <para>- <see cref="ImageMessage"/></para> 
+/// <para>- <see cref="MultiModalMessage"/></para>
+/// <para>- <see cref="ToolCallMessage"/></para>
+/// <para>- <see cref="ToolCallResultMessage"/></para>
+/// <para>- <see cref="IMessage{ChatRequestMessage}"/> where T is <see cref="ChatRequestMessage"/></para>
+/// <para>- <see cref="AggregateMessage{TMessage1, TMessage2}"/> where TMessage1 is <see cref="ToolCallMessage"/> and TMessage2 is <see cref="ToolCallResultMessage"/></para>
+/// </summary>
+public class AzureAIInferenceChatRequestMessageConnector : IStreamingMiddleware
+{
+    private bool strictMode = false;
+
+    /// <summary>
+    /// Create a new instance of <see cref="AzureAIInferenceChatRequestMessageConnector"/>.
+    /// </summary>
+    /// <param name="strictMode">If true, <see cref="AzureAIInferenceChatRequestMessageConnector"/> will throw an <see cref="InvalidOperationException"/>
+    /// When the message type is not supported. If false, it will ignore the unsupported message type.</param>
+    public AzureAIInferenceChatRequestMessageConnector(bool strictMode = false)
+    {
+        this.strictMode = strictMode;
+    }
+
+    public string? Name => nameof(AzureAIInferenceChatRequestMessageConnector);
+
+    public async Task<IMessage> InvokeAsync(MiddlewareContext context, IAgent agent, CancellationToken cancellationToken = default)
+    {
+        var chatMessages = ProcessIncomingMessages(agent, context.Messages);
+
+        var reply = await agent.GenerateReplyAsync(chatMessages, context.Options, cancellationToken);
+
+        return PostProcessMessage(reply);
+    }
+
+    public async IAsyncEnumerable<IMessage> InvokeAsync(
+        MiddlewareContext context,
+        IStreamingAgent agent,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var chatMessages = ProcessIncomingMessages(agent, context.Messages);
+        var streamingReply = agent.GenerateStreamingReplyAsync(chatMessages, context.Options, cancellationToken);
+        string? currentToolName = null;
+        await foreach (var reply in streamingReply)
+        {
+            if (reply is IMessage<StreamingChatCompletionsUpdate> update)
+            {
+                if (update.Content.FunctionName is string functionName)
+                {
+                    currentToolName = functionName;
+                }
+                else if (update.Content.ToolCallUpdate is StreamingFunctionToolCallUpdate toolCallUpdate && toolCallUpdate.Name is string toolCallName)
+                {
+                    currentToolName = toolCallName;
+                }
+                var postProcessMessage = PostProcessStreamingMessage(update, currentToolName);
+                if (postProcessMessage != null)
+                {
+                    yield return postProcessMessage;
+                }
+            }
+            else
+            {
+                if (this.strictMode)
+                {
+                    throw new InvalidOperationException($"Invalid streaming message type {reply.GetType().Name}");
+                }
+                else
+                {
+                    yield return reply;
+                }
+            }
+        }
+    }
+
+    public IMessage PostProcessMessage(IMessage message)
+    {
+        return message switch
+        {
+            IMessage<ChatResponseMessage> m => PostProcessChatResponseMessage(m.Content, m.From),
+            IMessage<ChatCompletions> m => PostProcessChatCompletions(m),
+            _ when strictMode is false => message,
+            _ => throw new InvalidOperationException($"Invalid return message type {message.GetType().Name}"),
+        };
+    }
+
+    public IMessage? PostProcessStreamingMessage(IMessage<StreamingChatCompletionsUpdate> update, string? currentToolName)
+    {
+        if (update.Content.ContentUpdate is string contentUpdate && string.IsNullOrEmpty(contentUpdate) == false)
+        {
+            // text message
+            return new TextMessageUpdate(Role.Assistant, contentUpdate, from: update.From);
+        }
+        else if (update.Content.FunctionName is string functionName)
+        {
+            return new ToolCallMessageUpdate(functionName, string.Empty, from: update.From);
+        }
+        else if (update.Content.FunctionArgumentsUpdate is string functionArgumentsUpdate && currentToolName is string)
+        {
+            return new ToolCallMessageUpdate(currentToolName, functionArgumentsUpdate, from: update.From);
+        }
+        else if (update.Content.ToolCallUpdate is StreamingFunctionToolCallUpdate tooCallUpdate && currentToolName is string)
+        {
+            return new ToolCallMessageUpdate(tooCallUpdate.Name ?? currentToolName, tooCallUpdate.ArgumentsUpdate, from: update.From);
+        }
+        else
+        {
+            return null;
+        }
+    }
+
+    private IMessage PostProcessChatCompletions(IMessage<ChatCompletions> message)
+    {
+        // throw exception if prompt filter results is not null
+        if (message.Content.Choices[0].FinishReason == CompletionsFinishReason.ContentFiltered)
+        {
+            throw new InvalidOperationException("The content is filtered because its potential risk. Please try another input.");
+        }
+
+        return PostProcessChatResponseMessage(message.Content.Choices[0].Message, message.From);
+    }
+
+    private IMessage PostProcessChatResponseMessage(ChatResponseMessage chatResponseMessage, string? from)
+    {
+        var textContent = chatResponseMessage.Content;
+        if (chatResponseMessage.ToolCalls.Where(tc => tc is ChatCompletionsFunctionToolCall).Any())
+        {
+            var functionToolCalls = chatResponseMessage.ToolCalls
+                .Where(tc => tc is ChatCompletionsFunctionToolCall)
+                .Select(tc => (ChatCompletionsFunctionToolCall)tc);
+
+            var toolCalls = functionToolCalls.Select(tc => new ToolCall(tc.Name, tc.Arguments) { ToolCallId = tc.Id });
+
+            return new ToolCallMessage(toolCalls, from)
+            {
+                Content = textContent,
+            };
+        }
+
+        if (textContent is string content && !string.IsNullOrEmpty(content))
+        {
+            return new TextMessage(Role.Assistant, content, from);
+        }
+
+        throw new InvalidOperationException("Invalid ChatResponseMessage");
+    }
+
+    public IEnumerable<IMessage> ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages)
+    {
+        return messages.SelectMany<IMessage, IMessage>(m =>
+        {
+            if (m is IMessage<ChatRequestMessage> crm)
+            {
+                return [crm];
+            }
+            else
+            {
+                var chatRequestMessages = m switch
+                {
+                    TextMessage textMessage => ProcessTextMessage(agent, textMessage),
+                    ImageMessage imageMessage when (imageMessage.From is null || imageMessage.From != agent.Name) => ProcessImageMessage(agent, imageMessage),
+                    MultiModalMessage multiModalMessage when (multiModalMessage.From is null || multiModalMessage.From != agent.Name) => ProcessMultiModalMessage(agent, multiModalMessage),
+                    ToolCallMessage toolCallMessage when (toolCallMessage.From is null || toolCallMessage.From == agent.Name) => ProcessToolCallMessage(agent, toolCallMessage),
+                    ToolCallResultMessage toolCallResultMessage => ProcessToolCallResultMessage(toolCallResultMessage),
+                    AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage => ProcessFunctionCallMiddlewareMessage(agent, aggregateMessage),
+                    _ when strictMode is false => [],
+                    _ => throw new InvalidOperationException($"Invalid message type: {m.GetType().Name}"),
+                };
+
+                if (chatRequestMessages.Any())
+                {
+                    return chatRequestMessages.Select(cm => MessageEnvelope.Create(cm, m.From));
+                }
+                else
+                {
+                    return [m];
+                }
+            }
+        });
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessTextMessage(IAgent agent, TextMessage message)
+    {
+        if (message.Role == Role.System)
+        {
+            return [new ChatRequestSystemMessage(message.Content)];
+        }
+
+        if (agent.Name == message.From)
+        {
+            return [new ChatRequestAssistantMessage { Content = message.Content }];
+        }
+        else
+        {
+            return message.From switch
+            {
+                null when message.Role == Role.User => [new ChatRequestUserMessage(message.Content)],
+                null when message.Role == Role.Assistant => [new ChatRequestAssistantMessage() { Content = message.Content }],
+                null => throw new InvalidOperationException("Invalid Role"),
+                _ => [new ChatRequestUserMessage(message.Content)]
+            };
+        }
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessImageMessage(IAgent agent, ImageMessage message)
+    {
+        if (agent.Name == message.From)
+        {
+            // image message from assistant is not supported
+            throw new ArgumentException("ImageMessage is not supported when message.From is the same with agent");
+        }
+
+        var imageContentItem = this.CreateChatMessageImageContentItemFromImageMessage(message);
+        return [new ChatRequestUserMessage([imageContentItem])];
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessMultiModalMessage(IAgent agent, MultiModalMessage message)
+    {
+        if (agent.Name == message.From)
+        {
+            // image message from assistant is not supported
+            throw new ArgumentException("MultiModalMessage is not supported when message.From is the same with agent");
+        }
+
+        IEnumerable<ChatMessageContentItem> items = message.Content.Select<IMessage, ChatMessageContentItem>(ci => ci switch
+        {
+            TextMessage text => new ChatMessageTextContentItem(text.Content),
+            ImageMessage image => this.CreateChatMessageImageContentItemFromImageMessage(image),
+            _ => throw new NotImplementedException(),
+        });
+
+        return [new ChatRequestUserMessage(items)];
+    }
+
+    private ChatMessageImageContentItem CreateChatMessageImageContentItemFromImageMessage(ImageMessage message)
+    {
+        return message.Data is null && message.Url is not null
+            ? new ChatMessageImageContentItem(new Uri(message.Url))
+            : new ChatMessageImageContentItem(message.Data, message.Data?.MediaType);
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessToolCallMessage(IAgent agent, ToolCallMessage message)
+    {
+        if (message.From is not null && message.From != agent.Name)
+        {
+            throw new ArgumentException("ToolCallMessage is not supported when message.From is not the same with agent");
+        }
+
+        var toolCall = message.ToolCalls.Select((tc, i) => new ChatCompletionsFunctionToolCall(tc.ToolCallId ?? $"{tc.FunctionName}_{i}", tc.FunctionName, tc.FunctionArguments));
+        var textContent = message.GetContent() ?? string.Empty;
+        var chatRequestMessage = new ChatRequestAssistantMessage() { Content = textContent };
+        foreach (var tc in toolCall)
+        {
+            chatRequestMessage.ToolCalls.Add(tc);
+        }
+
+        return [chatRequestMessage];
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessToolCallResultMessage(ToolCallResultMessage message)
+    {
+        return message.ToolCalls
+            .Where(tc => tc.Result is not null)
+            .Select((tc, i) => new ChatRequestToolMessage(tc.Result, tc.ToolCallId ?? $"{tc.FunctionName}_{i}"));
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessFunctionCallMiddlewareMessage(IAgent agent, AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage)
+    {
+        if (aggregateMessage.From is not null && aggregateMessage.From != agent.Name)
+        {
+            // convert as user message
+            var resultMessage = aggregateMessage.Message2;
+
+            return resultMessage.ToolCalls.Select(tc => new ChatRequestUserMessage(tc.Result));
+        }
+        else
+        {
+            var toolCallMessage1 = aggregateMessage.Message1;
+            var toolCallResultMessage = aggregateMessage.Message2;
+
+            var assistantMessage = this.ProcessToolCallMessage(agent, toolCallMessage1);
+            var toolCallResults = this.ProcessToolCallResultMessage(toolCallResultMessage);
+
+            return assistantMessage.Concat(toolCallResults);
+        }
+    }
+}
diff --git a/dotnet/src/AutoGen.Core/Extension/AgentExtension.cs b/dotnet/src/AutoGen.Core/Extension/AgentExtension.cs
index 44ce8838b73..13ce970d551 100644
--- a/dotnet/src/AutoGen.Core/Extension/AgentExtension.cs
+++ b/dotnet/src/AutoGen.Core/Extension/AgentExtension.cs
@@ -1,6 +1,7 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // AgentExtension.cs
 
+using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Threading;
@@ -60,14 +61,14 @@ public static async Task<IMessage> SendAsync(
     }
 
     /// <summary>
-    /// Send message to another agent.
+    /// Send message to another agent and iterate over the responses.
     /// </summary>
     /// <param name="agent">sender agent.</param>
     /// <param name="receiver">receiver agent.</param>
     /// <param name="chatHistory">chat history.</param>
     /// <param name="maxRound">max conversation round.</param>
     /// <returns>conversation history</returns>
-    public static async Task<IEnumerable<IMessage>> SendAsync(
+    public static IAsyncEnumerable<IMessage> SendAsync(
         this IAgent agent,
         IAgent receiver,
         IEnumerable<IMessage> chatHistory,
@@ -78,21 +79,21 @@ public static async Task<IEnumerable<IMessage>> SendAsync(
         {
             var gc = manager.GroupChat;
 
-            return await agent.SendMessageToGroupAsync(gc, chatHistory, maxRound, ct);
+            return gc.SendAsync(chatHistory, maxRound, ct);
         }
 
         var groupChat = new RoundRobinGroupChat(
-            agents: new[]
-            {
+            agents:
+            [
                 agent,
                 receiver,
-            });
+            ]);
 
-        return await groupChat.CallAsync(chatHistory, maxRound, ct: ct);
+        return groupChat.SendAsync(chatHistory, maxRound, cancellationToken: ct);
     }
 
     /// <summary>
-    /// Send message to another agent.
+    /// Send message to another agent and iterate over the responses.
     /// </summary>
     /// <param name="agent">sender agent.</param>
     /// <param name="message">message to send. will be added to the end of <paramref name="chatHistory"/> if provided </param>
@@ -100,7 +101,7 @@ public static async Task<IEnumerable<IMessage>> SendAsync(
     /// <param name="chatHistory">chat history.</param>
     /// <param name="maxRound">max conversation round.</param>
     /// <returns>conversation history</returns>
-    public static async Task<IEnumerable<IMessage>> SendAsync(
+    public static IAsyncEnumerable<IMessage> SendAsync(
         this IAgent agent,
         IAgent receiver,
         string message,
@@ -116,11 +117,12 @@ public static async Task<IEnumerable<IMessage>> SendAsync(
         chatHistory = chatHistory ?? new List<IMessage>();
         chatHistory = chatHistory.Append(msg);
 
-        return await agent.SendAsync(receiver, chatHistory, maxRound, ct);
+        return agent.SendAsync(receiver, chatHistory, maxRound, ct);
     }
 
     /// <summary>
-    /// Shortcut API to send message to another agent.
+    /// Shortcut API to send message to another agent and get all responses.
+    /// To iterate over the responses, use <see cref="SendAsync(IAgent, IAgent, string, IEnumerable{IMessage}?, int, CancellationToken)"/> or <see cref="SendAsync(IAgent, IAgent, IEnumerable{IMessage}, int, CancellationToken)"/>
     /// </summary>
     /// <param name="agent">sender agent</param>
     /// <param name="receiver">receiver agent</param>
@@ -144,10 +146,16 @@ public static async Task<IEnumerable<IMessage>> InitiateChatAsync(
             chatHistory.Add(msg);
         }
 
-        return await agent.SendAsync(receiver, chatHistory, maxRound, ct);
+        await foreach (var msg in agent.SendAsync(receiver, chatHistory, maxRound, ct))
+        {
+            chatHistory.Add(msg);
+        }
+
+        return chatHistory;
     }
 
-    public static async Task<IEnumerable<IMessage>> SendMessageToGroupAsync(
+    [Obsolete("use GroupChatExtension.SendAsync")]
+    public static IAsyncEnumerable<IMessage> SendMessageToGroupAsync(
         this IAgent agent,
         IGroupChat groupChat,
         string msg,
@@ -159,16 +167,18 @@ public static async Task<IEnumerable<IMessage>> SendMessageToGroupAsync(
         chatHistory = chatHistory ?? Enumerable.Empty<IMessage>();
         chatHistory = chatHistory.Append(chatMessage);
 
-        return await agent.SendMessageToGroupAsync(groupChat, chatHistory, maxRound, ct);
+        return agent.SendMessageToGroupAsync(groupChat, chatHistory, maxRound, ct);
     }
 
-    public static async Task<IEnumerable<IMessage>> SendMessageToGroupAsync(
+    [Obsolete("use GroupChatExtension.SendAsync")]
+    public static IAsyncEnumerable<IMessage> SendMessageToGroupAsync(
         this IAgent _,
         IGroupChat groupChat,
         IEnumerable<IMessage>? chatHistory = null,
         int maxRound = 10,
         CancellationToken ct = default)
     {
-        return await groupChat.CallAsync(chatHistory, maxRound, ct);
+        chatHistory = chatHistory ?? Enumerable.Empty<IMessage>();
+        return groupChat.SendAsync(chatHistory, maxRound, ct);
     }
 }
diff --git a/dotnet/src/AutoGen.DotnetInteractive/AutoGen.DotnetInteractive.csproj b/dotnet/src/AutoGen.DotnetInteractive/AutoGen.DotnetInteractive.csproj
index 5778761f05d..e850d94944b 100644
--- a/dotnet/src/AutoGen.DotnetInteractive/AutoGen.DotnetInteractive.csproj
+++ b/dotnet/src/AutoGen.DotnetInteractive/AutoGen.DotnetInteractive.csproj
@@ -27,9 +27,14 @@
 	  <EmbeddedResource Include="RestoreInteractive.config" />
 	</ItemGroup>
 
+  <ItemGroup Condition="'$(TargetFramework)' == 'net8.0'">
+    <PackageReference Include="Microsoft.DotNet.Interactive.Jupyter" Version="$(MicrosoftDotnetInteractive)" />
+    <PackageReference Include="Microsoft.DotNet.Interactive.PackageManagement" Version="$(MicrosoftDotnetInteractive)" />
+  </ItemGroup>
+
 	<ItemGroup>
 	  <ProjectReference Include="..\AutoGen.Core\AutoGen.Core.csproj" />
-    <ProjectReference Include="..\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false"  />
+    <ProjectReference Include="..\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
 	</ItemGroup>
 
 </Project>
diff --git a/dotnet/src/AutoGen.DotnetInteractive/DotnetInteractiveKernelBuilder.cs b/dotnet/src/AutoGen.DotnetInteractive/DotnetInteractiveKernelBuilder.cs
new file mode 100644
index 00000000000..cc282fbba55
--- /dev/null
+++ b/dotnet/src/AutoGen.DotnetInteractive/DotnetInteractiveKernelBuilder.cs
@@ -0,0 +1,28 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// DotnetInteractiveKernelBuilder.cs
+
+namespace AutoGen.DotnetInteractive;
+
+public static class DotnetInteractiveKernelBuilder
+{
+
+#if NET8_0_OR_GREATER
+    public static InProccessDotnetInteractiveKernelBuilder CreateEmptyInProcessKernelBuilder()
+    {
+        return new InProccessDotnetInteractiveKernelBuilder();
+    }
+
+
+    public static InProccessDotnetInteractiveKernelBuilder CreateDefaultInProcessKernelBuilder()
+    {
+        return new InProccessDotnetInteractiveKernelBuilder()
+            .AddCSharpKernel()
+            .AddFSharpKernel();
+    }
+#endif
+
+    public static DotnetInteractiveStdioKernelConnector CreateKernelBuilder(string workingDirectory, string kernelName = "root-proxy")
+    {
+        return new DotnetInteractiveStdioKernelConnector(workingDirectory, kernelName);
+    }
+}
diff --git a/dotnet/src/AutoGen.DotnetInteractive/DotnetInteractiveStdioKernelConnector.cs b/dotnet/src/AutoGen.DotnetInteractive/DotnetInteractiveStdioKernelConnector.cs
new file mode 100644
index 00000000000..a3ea80a7b12
--- /dev/null
+++ b/dotnet/src/AutoGen.DotnetInteractive/DotnetInteractiveStdioKernelConnector.cs
@@ -0,0 +1,86 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// DotnetInteractiveStdioKernelConnector.cs
+
+using AutoGen.DotnetInteractive.Extension;
+using Microsoft.DotNet.Interactive;
+using Microsoft.DotNet.Interactive.Commands;
+using Microsoft.DotNet.Interactive.Connection;
+
+namespace AutoGen.DotnetInteractive;
+
+public class DotnetInteractiveStdioKernelConnector
+{
+    private string workingDirectory;
+    private InteractiveService interactiveService;
+    private string kernelName;
+    private List<SubmitCode> setupCommands = new List<SubmitCode>();
+
+    internal DotnetInteractiveStdioKernelConnector(string workingDirectory, string kernelName = "root-proxy")
+    {
+        this.workingDirectory = workingDirectory;
+        this.interactiveService = new InteractiveService(workingDirectory);
+        this.kernelName = kernelName;
+    }
+
+    public DotnetInteractiveStdioKernelConnector RestoreDotnetInteractive()
+    {
+        if (this.interactiveService.RestoreDotnetInteractive())
+        {
+            return this;
+        }
+        else
+        {
+            throw new Exception("Failed to restore dotnet interactive tool.");
+        }
+    }
+
+    public DotnetInteractiveStdioKernelConnector AddPythonKernel(
+        string venv,
+        string kernelName = "python")
+    {
+        var magicCommand = $"#!connect jupyter --kernel-name {kernelName} --kernel-spec {venv}";
+        var connectCommand = new SubmitCode(magicCommand);
+
+        this.setupCommands.Add(connectCommand);
+
+        return this;
+    }
+
+    public async Task<Kernel> BuildAsync(CancellationToken ct = default)
+    {
+        var compositeKernel = new CompositeKernel();
+        var url = KernelHost.CreateHostUri(this.kernelName);
+        var cmd = new string[]
+            {
+                    "dotnet",
+                    "tool",
+                    "run",
+                    "dotnet-interactive",
+                    $"[cb-{this.kernelName}]",
+                    "stdio",
+                    //"--default-kernel",
+                    //"csharp",
+                    "--working-dir",
+                    $@"""{workingDirectory}""",
+            };
+
+        var connector = new StdIoKernelConnector(
+            cmd,
+            this.kernelName,
+            url,
+            new DirectoryInfo(this.workingDirectory));
+
+        var rootProxyKernel = await connector.CreateRootProxyKernelAsync();
+
+        rootProxyKernel.KernelInfo.SupportedKernelCommands.Add(new(nameof(SubmitCode)));
+
+        var dotnetKernel = await connector.CreateProxyKernelAsync(".NET");
+        foreach (var setupCommand in this.setupCommands)
+        {
+            var setupCommandResult = await rootProxyKernel.SendAsync(setupCommand, ct);
+            setupCommandResult.ThrowOnCommandFailed();
+        }
+
+        return rootProxyKernel;
+    }
+}
diff --git a/dotnet/src/AutoGen.DotnetInteractive/Extension/AgentExtension.cs b/dotnet/src/AutoGen.DotnetInteractive/Extension/AgentExtension.cs
index 83955c53fa1..de1e2a68cc0 100644
--- a/dotnet/src/AutoGen.DotnetInteractive/Extension/AgentExtension.cs
+++ b/dotnet/src/AutoGen.DotnetInteractive/Extension/AgentExtension.cs
@@ -21,6 +21,7 @@ public static class AgentExtension
     /// [!code-csharp[Example04_Dynamic_GroupChat_Coding_Task](~/../sample/AutoGen.BasicSamples/Example04_Dynamic_GroupChat_Coding_Task.cs)]
     /// ]]>
     /// </example>
+    [Obsolete]
     public static IAgent RegisterDotnetCodeBlockExectionHook(
         this IAgent agent,
         InteractiveService interactiveService,
diff --git a/dotnet/src/AutoGen.DotnetInteractive/Utils.cs b/dotnet/src/AutoGen.DotnetInteractive/Extension/KernelExtension.cs
similarity index 57%
rename from dotnet/src/AutoGen.DotnetInteractive/Utils.cs
rename to dotnet/src/AutoGen.DotnetInteractive/Extension/KernelExtension.cs
index d10208d508c..2a7afdf8857 100644
--- a/dotnet/src/AutoGen.DotnetInteractive/Utils.cs
+++ b/dotnet/src/AutoGen.DotnetInteractive/Extension/KernelExtension.cs
@@ -1,23 +1,42 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
-// Utils.cs
+// KernelExtension.cs
 
-using System.Collections;
-using System.Collections.Immutable;
 using Microsoft.DotNet.Interactive;
 using Microsoft.DotNet.Interactive.Commands;
 using Microsoft.DotNet.Interactive.Connection;
 using Microsoft.DotNet.Interactive.Events;
 
-public static class ObservableExtensions
+namespace AutoGen.DotnetInteractive.Extension;
+
+public static class KernelExtension
 {
-    public static SubscribedList<T> ToSubscribedList<T>(this IObservable<T> source)
+    public static async Task<string?> RunSubmitCodeCommandAsync(
+        this Kernel kernel,
+        string codeBlock,
+        string targetKernelName,
+        CancellationToken ct = default)
     {
-        return new SubscribedList<T>(source);
+        try
+        {
+            var cmd = new SubmitCode(codeBlock, targetKernelName);
+            var res = await kernel.SendAndThrowOnCommandFailedAsync(cmd, ct);
+            var events = res.Events;
+            var displayValues = res.Events.Where(x => x is StandardErrorValueProduced || x is StandardOutputValueProduced || x is ReturnValueProduced || x is DisplayedValueProduced)
+                    .SelectMany(x => (x as DisplayEvent)!.FormattedValues);
+
+            if (displayValues is null || displayValues.Count() == 0)
+            {
+                return null;
+            }
+
+            return string.Join("\n", displayValues.Select(x => x.Value));
+        }
+        catch (Exception ex)
+        {
+            return $"Error: {ex.Message}";
+        }
     }
-}
 
-public static class KernelExtensions
-{
     internal static void SetUpValueSharingIfSupported(this ProxyKernel proxyKernel)
     {
         var supportedCommands = proxyKernel.KernelInfo.SupportedKernelCommands;
@@ -38,7 +57,7 @@ internal static async Task<KernelCommandResult> SendAndThrowOnCommandFailedAsync
         return result;
     }
 
-    private static void ThrowOnCommandFailed(this KernelCommandResult result)
+    internal static void ThrowOnCommandFailed(this KernelCommandResult result)
     {
         var failedEvents = result.Events.OfType<CommandFailed>();
         if (!failedEvents.Any())
@@ -60,27 +79,3 @@ private static void ThrowOnCommandFailed(this KernelCommandResult result)
     private static Exception GetException(this CommandFailed commandFailedEvent)
         => new Exception(commandFailedEvent.Message);
 }
-
-public class SubscribedList<T> : IReadOnlyList<T>, IDisposable
-{
-    private ImmutableArray<T> _list = ImmutableArray<T>.Empty;
-    private readonly IDisposable _subscription;
-
-    public SubscribedList(IObservable<T> source)
-    {
-        _subscription = source.Subscribe(x => _list = _list.Add(x));
-    }
-
-    public IEnumerator<T> GetEnumerator()
-    {
-        return ((IEnumerable<T>)_list).GetEnumerator();
-    }
-
-    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-
-    public int Count => _list.Length;
-
-    public T this[int index] => _list[index];
-
-    public void Dispose() => _subscription.Dispose();
-}
diff --git a/dotnet/src/AutoGen.DotnetInteractive/Extension/MessageExtension.cs b/dotnet/src/AutoGen.DotnetInteractive/Extension/MessageExtension.cs
new file mode 100644
index 00000000000..6a8bf66c19f
--- /dev/null
+++ b/dotnet/src/AutoGen.DotnetInteractive/Extension/MessageExtension.cs
@@ -0,0 +1,53 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// MessageExtension.cs
+
+using System.Text.RegularExpressions;
+
+namespace AutoGen.DotnetInteractive.Extension;
+
+public static class MessageExtension
+{
+    /// <summary>
+    /// Extract a single code block from a message. If the message contains multiple code blocks, only the first one will be returned.
+    /// </summary>
+    /// <param name="message"></param>
+    /// <param name="codeBlockPrefix">code block prefix, e.g. ```csharp</param>
+    /// <param name="codeBlockSuffix">code block suffix, e.g. ```</param>
+    /// <returns></returns>
+    public static string? ExtractCodeBlock(
+        this IMessage message,
+        string codeBlockPrefix,
+        string codeBlockSuffix)
+    {
+        foreach (var codeBlock in message.ExtractCodeBlocks(codeBlockPrefix, codeBlockSuffix))
+        {
+            return codeBlock;
+        }
+
+        return null;
+    }
+
+    /// <summary>
+    /// Extract all code blocks from a message.
+    /// </summary>
+    /// <param name="message"></param>
+    /// <param name="codeBlockPrefix">code block prefix, e.g. ```csharp</param>
+    /// <param name="codeBlockSuffix">code block suffix, e.g. ```</param>
+    /// <returns></returns>
+    public static IEnumerable<string> ExtractCodeBlocks(
+        this IMessage message,
+        string codeBlockPrefix,
+        string codeBlockSuffix)
+    {
+        var content = message.GetContent() ?? string.Empty;
+        if (string.IsNullOrWhiteSpace(content))
+        {
+            yield break;
+        }
+
+        foreach (Match match in Regex.Matches(content, $@"{codeBlockPrefix}([\s\S]*?){codeBlockSuffix}"))
+        {
+            yield return match.Groups[1].Value.Trim();
+        }
+    }
+}
diff --git a/dotnet/src/AutoGen.DotnetInteractive/InProccessDotnetInteractiveKernelBuilder.cs b/dotnet/src/AutoGen.DotnetInteractive/InProccessDotnetInteractiveKernelBuilder.cs
new file mode 100644
index 00000000000..6ddd3d6b417
--- /dev/null
+++ b/dotnet/src/AutoGen.DotnetInteractive/InProccessDotnetInteractiveKernelBuilder.cs
@@ -0,0 +1,110 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// InProccessDotnetInteractiveKernelBuilder.cs
+
+#if NET8_0_OR_GREATER
+using AutoGen.DotnetInteractive.Extension;
+using Microsoft.DotNet.Interactive;
+using Microsoft.DotNet.Interactive.Commands;
+using Microsoft.DotNet.Interactive.CSharp;
+using Microsoft.DotNet.Interactive.FSharp;
+using Microsoft.DotNet.Interactive.Jupyter;
+using Microsoft.DotNet.Interactive.PackageManagement;
+using Microsoft.DotNet.Interactive.PowerShell;
+
+namespace AutoGen.DotnetInteractive;
+
+/// <summary>
+/// Build an in-proc dotnet interactive kernel.
+/// </summary>
+public class InProccessDotnetInteractiveKernelBuilder
+{
+    private readonly CompositeKernel compositeKernel;
+
+    internal InProccessDotnetInteractiveKernelBuilder()
+    {
+        this.compositeKernel = new CompositeKernel();
+
+        // add jupyter connector
+        this.compositeKernel.AddKernelConnector(
+            new ConnectJupyterKernelCommand()
+            .AddConnectionOptions(new JupyterHttpKernelConnectionOptions())
+            .AddConnectionOptions(new JupyterLocalKernelConnectionOptions()));
+    }
+
+    public InProccessDotnetInteractiveKernelBuilder AddCSharpKernel(IEnumerable<string>? aliases = null)
+    {
+        aliases ??= ["c#", "C#", "csharp"];
+        // create csharp kernel
+        var csharpKernel = new CSharpKernel()
+            .UseNugetDirective((k, resolvedPackageReference) =>
+            {
+
+                k.AddAssemblyReferences(resolvedPackageReference
+                    .SelectMany(r => r.AssemblyPaths));
+                return Task.CompletedTask;
+            })
+            .UseKernelHelpers()
+            .UseWho()
+            .UseMathAndLaTeX()
+            .UseValueSharing();
+
+        this.AddKernel(csharpKernel, aliases);
+
+        return this;
+    }
+
+    public InProccessDotnetInteractiveKernelBuilder AddFSharpKernel(IEnumerable<string>? aliases = null)
+    {
+        aliases ??= ["f#", "F#", "fsharp"];
+        // create fsharp kernel
+        var fsharpKernel = new FSharpKernel()
+            .UseDefaultFormatting()
+            .UseKernelHelpers()
+            .UseWho()
+            .UseMathAndLaTeX()
+            .UseValueSharing();
+
+        this.AddKernel(fsharpKernel, aliases);
+
+        return this;
+    }
+
+    public InProccessDotnetInteractiveKernelBuilder AddPowershellKernel(IEnumerable<string>? aliases = null)
+    {
+        aliases ??= ["pwsh", "powershell"];
+        // create powershell kernel
+        var powershellKernel = new PowerShellKernel()
+                .UseProfiles()
+                .UseValueSharing();
+
+        this.AddKernel(powershellKernel, aliases);
+
+        return this;
+    }
+
+    public InProccessDotnetInteractiveKernelBuilder AddPythonKernel(string venv, string kernelName = "python")
+    {
+        // create python kernel
+        var magicCommand = $"#!connect jupyter --kernel-name {kernelName} --kernel-spec {venv}";
+        var connectCommand = new SubmitCode(magicCommand);
+        var result = this.compositeKernel.SendAsync(connectCommand).Result;
+
+        result.ThrowOnCommandFailed();
+
+        return this;
+    }
+
+    public CompositeKernel Build()
+    {
+        return this.compositeKernel
+            .UseDefaultMagicCommands()
+            .UseImportMagicCommand();
+    }
+
+    private InProccessDotnetInteractiveKernelBuilder AddKernel(Kernel kernel, IEnumerable<string>? aliases = null)
+    {
+        this.compositeKernel.Add(kernel, aliases);
+        return this;
+    }
+}
+#endif
diff --git a/dotnet/src/AutoGen.DotnetInteractive/InteractiveService.cs b/dotnet/src/AutoGen.DotnetInteractive/InteractiveService.cs
index 3797dfcff64..3381aecf579 100644
--- a/dotnet/src/AutoGen.DotnetInteractive/InteractiveService.cs
+++ b/dotnet/src/AutoGen.DotnetInteractive/InteractiveService.cs
@@ -4,6 +4,7 @@
 using System.Diagnostics;
 using System.Reactive.Linq;
 using System.Reflection;
+using AutoGen.DotnetInteractive.Extension;
 using Microsoft.DotNet.Interactive;
 using Microsoft.DotNet.Interactive.Commands;
 using Microsoft.DotNet.Interactive.Connection;
@@ -21,14 +22,6 @@ public class InteractiveService : IDisposable
     //private readonly ProcessJobTracker jobTracker = new ProcessJobTracker();
     private string? installingDirectory;
 
-    public event EventHandler<DisplayEvent>? DisplayEvent;
-
-    public event EventHandler<string>? Output;
-
-    public event EventHandler<CommandFailed>? CommandFailed;
-
-    public event EventHandler<HoverTextProduced>? HoverTextProduced;
-
     /// <summary>
     /// Install dotnet interactive tool to <paramref name="installingDirectory"/>
     /// and create an instance of <see cref="InteractiveService"/>.
@@ -52,6 +45,8 @@ public InteractiveService(Kernel kernel)
         this.kernel = kernel;
     }
 
+    public Kernel? Kernel => this.kernel;
+
     public async Task<bool> StartAsync(string workingDirectory, CancellationToken ct = default)
     {
         if (this.kernel != null)
@@ -63,31 +58,14 @@ public async Task<bool> StartAsync(string workingDirectory, CancellationToken ct
         return true;
     }
 
-    public async Task<string?> SubmitCommandAsync(KernelCommand cmd, CancellationToken ct)
+    public async Task<string?> SubmitCommandAsync(SubmitCode cmd, CancellationToken ct)
     {
         if (this.kernel == null)
         {
             throw new Exception("Kernel is not running");
         }
 
-        try
-        {
-            var res = await this.kernel.SendAndThrowOnCommandFailedAsync(cmd, ct);
-            var events = res.Events;
-            var displayValues = events.Where(x => x is StandardErrorValueProduced || x is StandardOutputValueProduced || x is ReturnValueProduced)
-                    .SelectMany(x => (x as DisplayEvent)!.FormattedValues);
-
-            if (displayValues is null || displayValues.Count() == 0)
-            {
-                return null;
-            }
-
-            return string.Join("\n", displayValues.Select(x => x.Value));
-        }
-        catch (Exception ex)
-        {
-            return $"Error: {ex.Message}";
-        }
+        return await this.kernel.RunSubmitCodeCommandAsync(cmd.Code, cmd.TargetKernelName, ct);
     }
 
     public async Task<string?> SubmitPowershellCodeAsync(string code, CancellationToken ct)
@@ -109,7 +87,6 @@ public bool RestoreDotnetInteractive()
             throw new Exception("Installing directory is not set");
         }
 
-        this.WriteLine("Restore dotnet interactive tool");
         // write RestoreInteractive.config from embedded resource to this.workingDirectory
         var assembly = Assembly.GetAssembly(typeof(InteractiveService))!;
         var resourceName = "AutoGen.DotnetInteractive.RestoreInteractive.config";
@@ -202,8 +179,6 @@ await rootProxyKernel.SendAsync(
             //compositeKernel.DefaultKernelName = "csharp";
             compositeKernel.Add(rootProxyKernel);
 
-            compositeKernel.KernelEvents.Subscribe(this.OnKernelDiagnosticEventReceived);
-
             return compositeKernel;
         }
         catch (CommandLineInvocationException) when (restoreWhenFail)
@@ -219,35 +194,11 @@ await rootProxyKernel.SendAsync(
         }
     }
 
-    private void OnKernelDiagnosticEventReceived(KernelEvent ke)
-    {
-        this.WriteLine("Receive data from kernel");
-        this.WriteLine(KernelEventEnvelope.Serialize(ke));
-
-        switch (ke)
-        {
-            case DisplayEvent de:
-                this.DisplayEvent?.Invoke(this, de);
-                break;
-            case CommandFailed cf:
-                this.CommandFailed?.Invoke(this, cf);
-                break;
-            case HoverTextProduced cf:
-                this.HoverTextProduced?.Invoke(this, cf);
-                break;
-        }
-    }
-
-    private void WriteLine(string data)
-    {
-        this.Output?.Invoke(this, data);
-    }
-
     private void PrintProcessOutput(object sender, DataReceivedEventArgs e)
     {
         if (!string.IsNullOrEmpty(e.Data))
         {
-            this.WriteLine(e.Data);
+            Console.WriteLine(e.Data);
         }
     }
 
diff --git a/dotnet/src/AutoGen.LMStudio/AutoGen.LMStudio.csproj b/dotnet/src/AutoGen.LMStudio/AutoGen.LMStudio.csproj
index 8725d564df4..aa891e71294 100644
--- a/dotnet/src/AutoGen.LMStudio/AutoGen.LMStudio.csproj
+++ b/dotnet/src/AutoGen.LMStudio/AutoGen.LMStudio.csproj
@@ -17,7 +17,7 @@
 
   <ItemGroup>
     <ProjectReference Include="..\AutoGen.Core\AutoGen.Core.csproj" />
-    <ProjectReference Include="..\AutoGen.OpenAI\AutoGen.OpenAI.csproj" />
+    <ProjectReference Include="..\AutoGen.OpenAI.V1\AutoGen.OpenAI.V1.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/dotnet/src/AutoGen.LMStudio/LMStudioAgent.cs b/dotnet/src/AutoGen.LMStudio/LMStudioAgent.cs
index c3930abc0de..c4808b443c7 100644
--- a/dotnet/src/AutoGen.LMStudio/LMStudioAgent.cs
+++ b/dotnet/src/AutoGen.LMStudio/LMStudioAgent.cs
@@ -6,7 +6,7 @@
 using System.Net.Http;
 using System.Threading;
 using System.Threading.Tasks;
-using AutoGen.OpenAI;
+using AutoGen.OpenAI.V1;
 using Azure.AI.OpenAI;
 using Azure.Core.Pipeline;
 
@@ -18,6 +18,7 @@ namespace AutoGen.LMStudio;
 /// <example>
 /// [!code-csharp[LMStudioAgent](../../sample/AutoGen.BasicSamples/Example08_LMStudio.cs?name=lmstudio_example_1)]
 /// </example>
+[Obsolete("Use OpenAIChatAgent to connect to LM Studio")]
 public class LMStudioAgent : IAgent
 {
     private readonly GPTAgent innerAgent;
diff --git a/dotnet/src/AutoGen.OpenAI/Agent/GPTAgent.cs b/dotnet/src/AutoGen.OpenAI.V1/Agent/GPTAgent.cs
similarity index 97%
rename from dotnet/src/AutoGen.OpenAI/Agent/GPTAgent.cs
rename to dotnet/src/AutoGen.OpenAI.V1/Agent/GPTAgent.cs
index 5de481245b7..a32af5c38f1 100644
--- a/dotnet/src/AutoGen.OpenAI/Agent/GPTAgent.cs
+++ b/dotnet/src/AutoGen.OpenAI.V1/Agent/GPTAgent.cs
@@ -5,10 +5,10 @@
 using System.Collections.Generic;
 using System.Threading;
 using System.Threading.Tasks;
-using AutoGen.OpenAI.Extension;
+using AutoGen.OpenAI.V1.Extension;
 using Azure.AI.OpenAI;
 
-namespace AutoGen.OpenAI;
+namespace AutoGen.OpenAI.V1;
 
 /// <summary>
 /// GPT agent that can be used to connect to OpenAI chat models like GPT-3.5, GPT-4, etc.
@@ -27,6 +27,7 @@ namespace AutoGen.OpenAI;
 /// <para>- <see cref="ToolCallMessage"/></para>
 /// <para>- <see cref="AggregateMessage{TMessage1, TMessage2}"/> where TMessage1 is <see cref="ToolCallMessage"/> and TMessage2 is <see cref="ToolCallResultMessage"/></para>
 /// </summary>
+[Obsolete("Use OpenAIChatAgent instead")]
 public class GPTAgent : IStreamingAgent
 {
     private readonly OpenAIClient openAIClient;
diff --git a/dotnet/src/AutoGen.OpenAI.V1/Agent/OpenAIChatAgent.cs b/dotnet/src/AutoGen.OpenAI.V1/Agent/OpenAIChatAgent.cs
new file mode 100644
index 00000000000..2305536b4e5
--- /dev/null
+++ b/dotnet/src/AutoGen.OpenAI.V1/Agent/OpenAIChatAgent.cs
@@ -0,0 +1,206 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIChatAgent.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+using AutoGen.OpenAI.V1.Extension;
+using Azure.AI.OpenAI;
+
+namespace AutoGen.OpenAI.V1;
+
+/// <summary>
+/// OpenAI client agent. This agent is a thin wrapper around <see cref="OpenAIClient"/> to provide a simple interface for chat completions.
+/// To better work with other agents, it's recommended to use <see cref="GPTAgent"/> which supports more message types and have a better compatibility with other agents.
+/// <para><see cref="OpenAIChatAgent" /> supports the following message types:</para>
+/// <list type="bullet">
+/// <item>
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatRequestMessage"/>: chat request message.
+/// </item>
+/// </list>
+/// <para><see cref="OpenAIChatAgent" /> returns the following message types:</para>
+/// <list type="bullet">
+/// <item>
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatResponseMessage"/>: chat response message.
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="StreamingChatCompletionsUpdate"/>: streaming chat completions update.
+/// </item>
+/// </list>
+/// </summary>
+public class OpenAIChatAgent : IStreamingAgent
+{
+    private readonly OpenAIClient openAIClient;
+    private readonly ChatCompletionsOptions options;
+    private readonly string systemMessage;
+
+    /// <summary>
+    /// Create a new instance of <see cref="OpenAIChatAgent"/>.
+    /// </summary>
+    /// <param name="openAIClient">openai client</param>
+    /// <param name="name">agent name</param>
+    /// <param name="modelName">model name. e.g. gpt-turbo-3.5</param>
+    /// <param name="systemMessage">system message</param>
+    /// <param name="temperature">temperature</param>
+    /// <param name="maxTokens">max tokens to generated</param>
+    /// <param name="responseFormat">response format, set it to <see cref="ChatCompletionsResponseFormat.JsonObject"/> to enable json mode.</param>
+    /// <param name="seed">seed to use, set it to enable deterministic output</param>
+    /// <param name="functions">functions</param>
+    public OpenAIChatAgent(
+        OpenAIClient openAIClient,
+        string name,
+        string modelName,
+        string systemMessage = "You are a helpful AI assistant",
+        float temperature = 0.7f,
+        int maxTokens = 1024,
+        int? seed = null,
+        ChatCompletionsResponseFormat? responseFormat = null,
+        IEnumerable<FunctionDefinition>? functions = null)
+        : this(
+            openAIClient: openAIClient,
+            name: name,
+            options: CreateChatCompletionOptions(modelName, temperature, maxTokens, seed, responseFormat, functions),
+            systemMessage: systemMessage)
+    {
+    }
+
+    /// <summary>
+    /// Create a new instance of <see cref="OpenAIChatAgent"/>.
+    /// </summary>
+    /// <param name="openAIClient">openai client</param>
+    /// <param name="name">agent name</param>
+    /// <param name="systemMessage">system message</param>
+    /// <param name="options">chat completion option. The option can't contain messages</param>
+    public OpenAIChatAgent(
+        OpenAIClient openAIClient,
+        string name,
+        ChatCompletionsOptions options,
+        string systemMessage = "You are a helpful AI assistant")
+    {
+        if (options.Messages is { Count: > 0 })
+        {
+            throw new ArgumentException("Messages should not be provided in options");
+        }
+
+        this.openAIClient = openAIClient;
+        this.Name = name;
+        this.options = options;
+        this.systemMessage = systemMessage;
+    }
+
+    public string Name { get; }
+
+    public async Task<IMessage> GenerateReplyAsync(
+        IEnumerable<IMessage> messages,
+        GenerateReplyOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        var settings = this.CreateChatCompletionsOptions(options, messages);
+        var reply = await this.openAIClient.GetChatCompletionsAsync(settings, cancellationToken);
+
+        return new MessageEnvelope<ChatCompletions>(reply, from: this.Name);
+    }
+
+    public async IAsyncEnumerable<IMessage> GenerateStreamingReplyAsync(
+        IEnumerable<IMessage> messages,
+        GenerateReplyOptions? options = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var settings = this.CreateChatCompletionsOptions(options, messages);
+        var response = await this.openAIClient.GetChatCompletionsStreamingAsync(settings, cancellationToken);
+        await foreach (var update in response.WithCancellation(cancellationToken))
+        {
+            if (update.ChoiceIndex > 0)
+            {
+                throw new InvalidOperationException("Only one choice is supported in streaming response");
+            }
+
+            yield return new MessageEnvelope<StreamingChatCompletionsUpdate>(update, from: this.Name);
+        }
+    }
+
+    private ChatCompletionsOptions CreateChatCompletionsOptions(GenerateReplyOptions? options, IEnumerable<IMessage> messages)
+    {
+        var oaiMessages = messages.Select(m => m switch
+        {
+            IMessage<ChatRequestMessage> chatRequestMessage => chatRequestMessage.Content,
+            _ => throw new ArgumentException("Invalid message type")
+        });
+
+        // add system message if there's no system message in messages
+        if (!oaiMessages.Any(m => m is ChatRequestSystemMessage))
+        {
+            oaiMessages = new[] { new ChatRequestSystemMessage(systemMessage) }.Concat(oaiMessages);
+        }
+
+        // clone the options by serializing and deserializing
+        var json = JsonSerializer.Serialize(this.options);
+        var settings = JsonSerializer.Deserialize<ChatCompletionsOptions>(json) ?? throw new InvalidOperationException("Failed to clone options");
+
+        foreach (var m in oaiMessages)
+        {
+            settings.Messages.Add(m);
+        }
+
+        settings.Temperature = options?.Temperature ?? settings.Temperature;
+        settings.MaxTokens = options?.MaxToken ?? settings.MaxTokens;
+
+        foreach (var functions in this.options.Tools)
+        {
+            settings.Tools.Add(functions);
+        }
+
+        foreach (var stopSequence in this.options.StopSequences)
+        {
+            settings.StopSequences.Add(stopSequence);
+        }
+
+        var openAIFunctionDefinitions = options?.Functions?.Select(f => f.ToOpenAIFunctionDefinition()).ToList();
+        if (openAIFunctionDefinitions is { Count: > 0 })
+        {
+            foreach (var f in openAIFunctionDefinitions)
+            {
+                settings.Tools.Add(new ChatCompletionsFunctionToolDefinition(f));
+            }
+        }
+
+        if (options?.StopSequence is var sequence && sequence is { Length: > 0 })
+        {
+            foreach (var seq in sequence)
+            {
+                settings.StopSequences.Add(seq);
+            }
+        }
+
+        return settings;
+    }
+
+    private static ChatCompletionsOptions CreateChatCompletionOptions(
+        string modelName,
+        float temperature = 0.7f,
+        int maxTokens = 1024,
+        int? seed = null,
+        ChatCompletionsResponseFormat? responseFormat = null,
+        IEnumerable<FunctionDefinition>? functions = null)
+    {
+        var options = new ChatCompletionsOptions(modelName, [])
+        {
+            Temperature = temperature,
+            MaxTokens = maxTokens,
+            Seed = seed,
+            ResponseFormat = responseFormat,
+        };
+
+        if (functions is not null)
+        {
+            foreach (var f in functions)
+            {
+                options.Tools.Add(new ChatCompletionsFunctionToolDefinition(f));
+            }
+        }
+
+        return options;
+    }
+}
diff --git a/dotnet/src/AutoGen.OpenAI.V1/AutoGen.OpenAI.V1.csproj b/dotnet/src/AutoGen.OpenAI.V1/AutoGen.OpenAI.V1.csproj
new file mode 100644
index 00000000000..21951cb32fb
--- /dev/null
+++ b/dotnet/src/AutoGen.OpenAI.V1/AutoGen.OpenAI.V1.csproj
@@ -0,0 +1,27 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <TargetFrameworks>$(PackageTargetFrameworks)</TargetFrameworks>
+    <RootNamespace>AutoGen.OpenAI</RootNamespace>
+  </PropertyGroup>
+
+  <Import Project="$(RepoRoot)/nuget/nuget-package.props" />
+
+  <PropertyGroup>
+    <!-- NuGet Package Settings -->
+    <Title>AutoGen.OpenAI.V1</Title>
+    <Description>
+      OpenAI Intergration for AutoGen.
+      This package connects to openai using Azure.AI.OpenAI v1 package. It is reserved to keep compatibility with the projects which stick to that v1 package.
+      To use the latest version of OpenAI SDK, please use AutoGen.OpenAI package.
+    </Description>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Azure.AI.OpenAI" Version="$(AzureOpenAIVersion)" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\AutoGen.Core\AutoGen.Core.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet/src/AutoGen.OpenAI/AzureOpenAIConfig.cs b/dotnet/src/AutoGen.OpenAI.V1/AzureOpenAIConfig.cs
similarity index 95%
rename from dotnet/src/AutoGen.OpenAI/AzureOpenAIConfig.cs
rename to dotnet/src/AutoGen.OpenAI.V1/AzureOpenAIConfig.cs
index 31df784ed21..2be8f21dc4f 100644
--- a/dotnet/src/AutoGen.OpenAI/AzureOpenAIConfig.cs
+++ b/dotnet/src/AutoGen.OpenAI.V1/AzureOpenAIConfig.cs
@@ -1,7 +1,7 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // AzureOpenAIConfig.cs
 
-namespace AutoGen.OpenAI;
+namespace AutoGen.OpenAI.V1;
 
 public class AzureOpenAIConfig : ILLMConfig
 {
diff --git a/dotnet/src/AutoGen.OpenAI.V1/Extension/FunctionContractExtension.cs b/dotnet/src/AutoGen.OpenAI.V1/Extension/FunctionContractExtension.cs
new file mode 100644
index 00000000000..62009b927ef
--- /dev/null
+++ b/dotnet/src/AutoGen.OpenAI.V1/Extension/FunctionContractExtension.cs
@@ -0,0 +1,63 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// FunctionContractExtension.cs
+
+using System;
+using System.Collections.Generic;
+using Azure.AI.OpenAI;
+using Json.Schema;
+using Json.Schema.Generation;
+
+namespace AutoGen.OpenAI.V1.Extension;
+
+public static class FunctionContractExtension
+{
+    /// <summary>
+    /// Convert a <see cref="FunctionContract"/> to a <see cref="FunctionDefinition"/> that can be used in gpt funciton call.
+    /// </summary>
+    /// <param name="functionContract">function contract</param>
+    /// <returns><see cref="FunctionDefinition"/></returns>
+    public static FunctionDefinition ToOpenAIFunctionDefinition(this FunctionContract functionContract)
+    {
+        var functionDefinition = new FunctionDefinition
+        {
+            Name = functionContract.Name,
+            Description = functionContract.Description,
+        };
+        var requiredParameterNames = new List<string>();
+        var propertiesSchemas = new Dictionary<string, JsonSchema>();
+        var propertySchemaBuilder = new JsonSchemaBuilder().Type(SchemaValueType.Object);
+        foreach (var param in functionContract.Parameters ?? [])
+        {
+            if (param.Name is null)
+            {
+                throw new InvalidOperationException("Parameter name cannot be null");
+            }
+
+            var schemaBuilder = new JsonSchemaBuilder().FromType(param.ParameterType ?? throw new ArgumentNullException(nameof(param.ParameterType)));
+            if (param.Description != null)
+            {
+                schemaBuilder = schemaBuilder.Description(param.Description);
+            }
+
+            if (param.IsRequired)
+            {
+                requiredParameterNames.Add(param.Name);
+            }
+
+            var schema = schemaBuilder.Build();
+            propertiesSchemas[param.Name] = schema;
+
+        }
+        propertySchemaBuilder = propertySchemaBuilder.Properties(propertiesSchemas);
+        propertySchemaBuilder = propertySchemaBuilder.Required(requiredParameterNames);
+
+        var option = new System.Text.Json.JsonSerializerOptions()
+        {
+            PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase
+        };
+
+        functionDefinition.Parameters = BinaryData.FromObjectAsJson(propertySchemaBuilder.Build(), option);
+
+        return functionDefinition;
+    }
+}
diff --git a/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs b/dotnet/src/AutoGen.OpenAI.V1/Extension/MessageExtension.cs
similarity index 99%
rename from dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs
rename to dotnet/src/AutoGen.OpenAI.V1/Extension/MessageExtension.cs
index ed795e5e8ed..3264dccf3a8 100644
--- a/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs
+++ b/dotnet/src/AutoGen.OpenAI.V1/Extension/MessageExtension.cs
@@ -6,7 +6,7 @@
 using System.Linq;
 using Azure.AI.OpenAI;
 
-namespace AutoGen.OpenAI;
+namespace AutoGen.OpenAI.V1;
 
 public static class MessageExtension
 {
diff --git a/dotnet/src/AutoGen.OpenAI.V1/Extension/OpenAIAgentExtension.cs b/dotnet/src/AutoGen.OpenAI.V1/Extension/OpenAIAgentExtension.cs
new file mode 100644
index 00000000000..6c0df8e0e96
--- /dev/null
+++ b/dotnet/src/AutoGen.OpenAI.V1/Extension/OpenAIAgentExtension.cs
@@ -0,0 +1,37 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIAgentExtension.cs
+
+namespace AutoGen.OpenAI.V1.Extension;
+
+public static class OpenAIAgentExtension
+{
+    /// <summary>
+    /// Register an <see cref="OpenAIChatRequestMessageConnector"/> to the <see cref="OpenAIChatAgent"/>
+    /// </summary>
+    /// <param name="connector">the connector to use. If null, a new instance of <see cref="OpenAIChatRequestMessageConnector"/> will be created.</param>
+    public static MiddlewareStreamingAgent<OpenAIChatAgent> RegisterMessageConnector(
+        this OpenAIChatAgent agent, OpenAIChatRequestMessageConnector? connector = null)
+    {
+        if (connector == null)
+        {
+            connector = new OpenAIChatRequestMessageConnector();
+        }
+
+        return agent.RegisterStreamingMiddleware(connector);
+    }
+
+    /// <summary>
+    /// Register an <see cref="OpenAIChatRequestMessageConnector"/> to the <see cref="MiddlewareAgent{T}"/> where T is <see cref="OpenAIChatAgent"/>
+    /// </summary>
+    /// <param name="connector">the connector to use. If null, a new instance of <see cref="OpenAIChatRequestMessageConnector"/> will be created.</param>
+    public static MiddlewareStreamingAgent<OpenAIChatAgent> RegisterMessageConnector(
+        this MiddlewareStreamingAgent<OpenAIChatAgent> agent, OpenAIChatRequestMessageConnector? connector = null)
+    {
+        if (connector == null)
+        {
+            connector = new OpenAIChatRequestMessageConnector();
+        }
+
+        return agent.RegisterStreamingMiddleware(connector);
+    }
+}
diff --git a/dotnet/src/AutoGen.OpenAI.V1/GlobalUsing.cs b/dotnet/src/AutoGen.OpenAI.V1/GlobalUsing.cs
new file mode 100644
index 00000000000..d66bf001ed5
--- /dev/null
+++ b/dotnet/src/AutoGen.OpenAI.V1/GlobalUsing.cs
@@ -0,0 +1,4 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// GlobalUsing.cs
+
+global using AutoGen.Core;
diff --git a/dotnet/src/AutoGen.OpenAI.V1/Middleware/OpenAIChatRequestMessageConnector.cs b/dotnet/src/AutoGen.OpenAI.V1/Middleware/OpenAIChatRequestMessageConnector.cs
new file mode 100644
index 00000000000..f1bea485c1c
--- /dev/null
+++ b/dotnet/src/AutoGen.OpenAI.V1/Middleware/OpenAIChatRequestMessageConnector.cs
@@ -0,0 +1,387 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIChatRequestMessageConnector.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Azure.AI.OpenAI;
+
+namespace AutoGen.OpenAI.V1;
+
+/// <summary>
+/// This middleware converts the incoming <see cref="IMessage"/> to <see cref="IMessage{ChatRequestMessage}" /> where T is <see cref="ChatRequestMessage"/> before sending to agent. And converts the output <see cref="ChatResponseMessage"/> to <see cref="IMessage"/> after receiving from agent.
+/// <para>Supported <see cref="IMessage"/> are</para>
+/// <para>- <see cref="TextMessage"/></para> 
+/// <para>- <see cref="ImageMessage"/></para> 
+/// <para>- <see cref="MultiModalMessage"/></para>
+/// <para>- <see cref="ToolCallMessage"/></para>
+/// <para>- <see cref="ToolCallResultMessage"/></para>
+/// <para>- <see cref="IMessage{ChatRequestMessage}"/> where T is <see cref="ChatRequestMessage"/></para>
+/// <para>- <see cref="AggregateMessage{TMessage1, TMessage2}"/> where TMessage1 is <see cref="ToolCallMessage"/> and TMessage2 is <see cref="ToolCallResultMessage"/></para>
+/// </summary>
+public class OpenAIChatRequestMessageConnector : IMiddleware, IStreamingMiddleware
+{
+    private bool strictMode = false;
+
+    /// <summary>
+    /// Create a new instance of <see cref="OpenAIChatRequestMessageConnector"/>.
+    /// </summary>
+    /// <param name="strictMode">If true, <see cref="OpenAIChatRequestMessageConnector"/> will throw an <see cref="InvalidOperationException"/>
+    /// When the message type is not supported. If false, it will ignore the unsupported message type.</param>
+    public OpenAIChatRequestMessageConnector(bool strictMode = false)
+    {
+        this.strictMode = strictMode;
+    }
+
+    public string? Name => nameof(OpenAIChatRequestMessageConnector);
+
+    public async Task<IMessage> InvokeAsync(MiddlewareContext context, IAgent agent, CancellationToken cancellationToken = default)
+    {
+        var chatMessages = ProcessIncomingMessages(agent, context.Messages);
+
+        var reply = await agent.GenerateReplyAsync(chatMessages, context.Options, cancellationToken);
+
+        return PostProcessMessage(reply);
+    }
+
+    public async IAsyncEnumerable<IMessage> InvokeAsync(
+        MiddlewareContext context,
+        IStreamingAgent agent,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var chatMessages = ProcessIncomingMessages(agent, context.Messages);
+        var streamingReply = agent.GenerateStreamingReplyAsync(chatMessages, context.Options, cancellationToken);
+        string? currentToolName = null;
+        await foreach (var reply in streamingReply)
+        {
+            if (reply is IMessage<StreamingChatCompletionsUpdate> update)
+            {
+                if (update.Content.FunctionName is string functionName)
+                {
+                    currentToolName = functionName;
+                }
+                else if (update.Content.ToolCallUpdate is StreamingFunctionToolCallUpdate toolCallUpdate && toolCallUpdate.Name is string toolCallName)
+                {
+                    currentToolName = toolCallName;
+                }
+                var postProcessMessage = PostProcessStreamingMessage(update, currentToolName);
+                if (postProcessMessage != null)
+                {
+                    yield return postProcessMessage;
+                }
+            }
+            else
+            {
+                if (this.strictMode)
+                {
+                    throw new InvalidOperationException($"Invalid streaming message type {reply.GetType().Name}");
+                }
+                else
+                {
+                    yield return reply;
+                }
+            }
+        }
+    }
+
+    public IMessage PostProcessMessage(IMessage message)
+    {
+        return message switch
+        {
+            IMessage<ChatResponseMessage> m => PostProcessChatResponseMessage(m.Content, m.From),
+            IMessage<ChatCompletions> m => PostProcessChatCompletions(m),
+            _ when strictMode is false => message,
+            _ => throw new InvalidOperationException($"Invalid return message type {message.GetType().Name}"),
+        };
+    }
+
+    public IMessage? PostProcessStreamingMessage(IMessage<StreamingChatCompletionsUpdate> update, string? currentToolName)
+    {
+        if (update.Content.ContentUpdate is string contentUpdate)
+        {
+            // text message
+            return new TextMessageUpdate(Role.Assistant, contentUpdate, from: update.From);
+        }
+        else if (update.Content.FunctionName is string functionName)
+        {
+            return new ToolCallMessageUpdate(functionName, string.Empty, from: update.From);
+        }
+        else if (update.Content.FunctionArgumentsUpdate is string functionArgumentsUpdate && currentToolName is string)
+        {
+            return new ToolCallMessageUpdate(currentToolName, functionArgumentsUpdate, from: update.From);
+        }
+        else if (update.Content.ToolCallUpdate is StreamingFunctionToolCallUpdate tooCallUpdate && currentToolName is string)
+        {
+            return new ToolCallMessageUpdate(tooCallUpdate.Name ?? currentToolName, tooCallUpdate.ArgumentsUpdate, from: update.From);
+        }
+        else
+        {
+            return null;
+        }
+    }
+
+    private IMessage PostProcessChatCompletions(IMessage<ChatCompletions> message)
+    {
+        // throw exception if prompt filter results is not null
+        if (message.Content.Choices[0].FinishReason == CompletionsFinishReason.ContentFiltered)
+        {
+            throw new InvalidOperationException("The content is filtered because its potential risk. Please try another input.");
+        }
+
+        return PostProcessChatResponseMessage(message.Content.Choices[0].Message, message.From);
+    }
+
+    private IMessage PostProcessChatResponseMessage(ChatResponseMessage chatResponseMessage, string? from)
+    {
+        var textContent = chatResponseMessage.Content;
+        if (chatResponseMessage.FunctionCall is FunctionCall functionCall)
+        {
+            return new ToolCallMessage(functionCall.Name, functionCall.Arguments, from)
+            {
+                Content = textContent,
+            };
+        }
+
+        if (chatResponseMessage.ToolCalls.Where(tc => tc is ChatCompletionsFunctionToolCall).Any())
+        {
+            var functionToolCalls = chatResponseMessage.ToolCalls
+                .Where(tc => tc is ChatCompletionsFunctionToolCall)
+                .Select(tc => (ChatCompletionsFunctionToolCall)tc);
+
+            var toolCalls = functionToolCalls.Select(tc => new ToolCall(tc.Name, tc.Arguments) { ToolCallId = tc.Id });
+
+            return new ToolCallMessage(toolCalls, from)
+            {
+                Content = textContent,
+            };
+        }
+
+        if (textContent is string content && !string.IsNullOrEmpty(content))
+        {
+            return new TextMessage(Role.Assistant, content, from);
+        }
+
+        throw new InvalidOperationException("Invalid ChatResponseMessage");
+    }
+
+    public IEnumerable<IMessage> ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages)
+    {
+        return messages.SelectMany<IMessage, IMessage>(m =>
+        {
+            if (m is IMessage<ChatRequestMessage> crm)
+            {
+                return [crm];
+            }
+            else
+            {
+                var chatRequestMessages = m switch
+                {
+                    TextMessage textMessage => ProcessTextMessage(agent, textMessage),
+                    ImageMessage imageMessage when (imageMessage.From is null || imageMessage.From != agent.Name) => ProcessImageMessage(agent, imageMessage),
+                    MultiModalMessage multiModalMessage when (multiModalMessage.From is null || multiModalMessage.From != agent.Name) => ProcessMultiModalMessage(agent, multiModalMessage),
+                    ToolCallMessage toolCallMessage when (toolCallMessage.From is null || toolCallMessage.From == agent.Name) => ProcessToolCallMessage(agent, toolCallMessage),
+                    ToolCallResultMessage toolCallResultMessage => ProcessToolCallResultMessage(toolCallResultMessage),
+                    AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage => ProcessFunctionCallMiddlewareMessage(agent, aggregateMessage),
+#pragma warning disable CS0618 // deprecated
+                    Message msg => ProcessMessage(agent, msg),
+#pragma warning restore CS0618 // deprecated
+                    _ when strictMode is false => [],
+                    _ => throw new InvalidOperationException($"Invalid message type: {m.GetType().Name}"),
+                };
+
+                if (chatRequestMessages.Any())
+                {
+                    return chatRequestMessages.Select(cm => MessageEnvelope.Create(cm, m.From));
+                }
+                else
+                {
+                    return [m];
+                }
+            }
+        });
+    }
+
+    [Obsolete("This method is deprecated, please use ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages) instead.")]
+    private IEnumerable<ChatRequestMessage> ProcessIncomingMessagesForSelf(Message message)
+    {
+        if (message.Role == Role.System)
+        {
+            return new[] { new ChatRequestSystemMessage(message.Content) };
+        }
+        else if (message.Content is string content && content is { Length: > 0 })
+        {
+            if (message.FunctionName is null)
+            {
+                return new[] { new ChatRequestAssistantMessage(message.Content) };
+            }
+            else
+            {
+                return new[] { new ChatRequestToolMessage(content, message.FunctionName) };
+            }
+        }
+        else if (message.FunctionName is string functionName)
+        {
+            var msg = new ChatRequestAssistantMessage(content: null)
+            {
+                FunctionCall = new FunctionCall(functionName, message.FunctionArguments)
+            };
+
+            return new[]
+            {
+                msg,
+            };
+        }
+        else
+        {
+            throw new InvalidOperationException("Invalid Message as message from self.");
+        }
+    }
+
+    [Obsolete("This method is deprecated, please use ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages) instead.")]
+    private IEnumerable<ChatRequestMessage> ProcessIncomingMessagesForOther(Message message)
+    {
+        if (message.Role == Role.System)
+        {
+            return [new ChatRequestSystemMessage(message.Content) { Name = message.From }];
+        }
+        else if (message.Content is string content && content is { Length: > 0 })
+        {
+            if (message.FunctionName is not null)
+            {
+                return new[] { new ChatRequestToolMessage(content, message.FunctionName) };
+            }
+
+            return [new ChatRequestUserMessage(message.Content) { Name = message.From }];
+        }
+        else if (message.FunctionName is string _)
+        {
+            return [new ChatRequestUserMessage("// Message type is not supported") { Name = message.From }];
+        }
+        else
+        {
+            throw new InvalidOperationException("Invalid Message as message from other.");
+        }
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessTextMessage(IAgent agent, TextMessage message)
+    {
+        if (message.Role == Role.System)
+        {
+            return [new ChatRequestSystemMessage(message.Content) { Name = message.From }];
+        }
+
+        if (agent.Name == message.From)
+        {
+            return [new ChatRequestAssistantMessage(message.Content) { Name = agent.Name }];
+        }
+        else
+        {
+            return message.From switch
+            {
+                null when message.Role == Role.User => [new ChatRequestUserMessage(message.Content)],
+                null when message.Role == Role.Assistant => [new ChatRequestAssistantMessage(message.Content)],
+                null => throw new InvalidOperationException("Invalid Role"),
+                _ => [new ChatRequestUserMessage(message.Content) { Name = message.From }]
+            };
+        }
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessImageMessage(IAgent agent, ImageMessage message)
+    {
+        if (agent.Name == message.From)
+        {
+            // image message from assistant is not supported
+            throw new ArgumentException("ImageMessage is not supported when message.From is the same with agent");
+        }
+
+        var imageContentItem = this.CreateChatMessageImageContentItemFromImageMessage(message);
+        return [new ChatRequestUserMessage([imageContentItem]) { Name = message.From }];
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessMultiModalMessage(IAgent agent, MultiModalMessage message)
+    {
+        if (agent.Name == message.From)
+        {
+            // image message from assistant is not supported
+            throw new ArgumentException("MultiModalMessage is not supported when message.From is the same with agent");
+        }
+
+        IEnumerable<ChatMessageContentItem> items = message.Content.Select<IMessage, ChatMessageContentItem>(ci => ci switch
+        {
+            TextMessage text => new ChatMessageTextContentItem(text.Content),
+            ImageMessage image => this.CreateChatMessageImageContentItemFromImageMessage(image),
+            _ => throw new NotImplementedException(),
+        });
+
+        return [new ChatRequestUserMessage(items) { Name = message.From }];
+    }
+
+    private ChatMessageImageContentItem CreateChatMessageImageContentItemFromImageMessage(ImageMessage message)
+    {
+        return message.Data is null && message.Url is not null
+            ? new ChatMessageImageContentItem(new Uri(message.Url))
+            : new ChatMessageImageContentItem(message.Data, message.Data?.MediaType);
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessToolCallMessage(IAgent agent, ToolCallMessage message)
+    {
+        if (message.From is not null && message.From != agent.Name)
+        {
+            throw new ArgumentException("ToolCallMessage is not supported when message.From is not the same with agent");
+        }
+
+        var toolCall = message.ToolCalls.Select((tc, i) => new ChatCompletionsFunctionToolCall(tc.ToolCallId ?? $"{tc.FunctionName}_{i}", tc.FunctionName, tc.FunctionArguments));
+        var textContent = message.GetContent() ?? string.Empty;
+        var chatRequestMessage = new ChatRequestAssistantMessage(textContent) { Name = message.From };
+        foreach (var tc in toolCall)
+        {
+            chatRequestMessage.ToolCalls.Add(tc);
+        }
+
+        return [chatRequestMessage];
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessToolCallResultMessage(ToolCallResultMessage message)
+    {
+        return message.ToolCalls
+            .Where(tc => tc.Result is not null)
+            .Select((tc, i) => new ChatRequestToolMessage(tc.Result, tc.ToolCallId ?? $"{tc.FunctionName}_{i}"));
+    }
+
+    [Obsolete("This method is deprecated, please use ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages) instead.")]
+    private IEnumerable<ChatRequestMessage> ProcessMessage(IAgent agent, Message message)
+    {
+        if (message.From is not null && message.From != agent.Name)
+        {
+            return ProcessIncomingMessagesForOther(message);
+        }
+        else
+        {
+            return ProcessIncomingMessagesForSelf(message);
+        }
+    }
+
+    private IEnumerable<ChatRequestMessage> ProcessFunctionCallMiddlewareMessage(IAgent agent, AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage)
+    {
+        if (aggregateMessage.From is not null && aggregateMessage.From != agent.Name)
+        {
+            // convert as user message
+            var resultMessage = aggregateMessage.Message2;
+
+            return resultMessage.ToolCalls.Select(tc => new ChatRequestUserMessage(tc.Result) { Name = aggregateMessage.From });
+        }
+        else
+        {
+            var toolCallMessage1 = aggregateMessage.Message1;
+            var toolCallResultMessage = aggregateMessage.Message2;
+
+            var assistantMessage = this.ProcessToolCallMessage(agent, toolCallMessage1);
+            var toolCallResults = this.ProcessToolCallResultMessage(toolCallResultMessage);
+
+            return assistantMessage.Concat(toolCallResults);
+        }
+    }
+}
diff --git a/dotnet/src/AutoGen.OpenAI/OpenAIConfig.cs b/dotnet/src/AutoGen.OpenAI.V1/OpenAIConfig.cs
similarity index 91%
rename from dotnet/src/AutoGen.OpenAI/OpenAIConfig.cs
rename to dotnet/src/AutoGen.OpenAI.V1/OpenAIConfig.cs
index 35ce1e491aa..592647cc2c1 100644
--- a/dotnet/src/AutoGen.OpenAI/OpenAIConfig.cs
+++ b/dotnet/src/AutoGen.OpenAI.V1/OpenAIConfig.cs
@@ -1,7 +1,7 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // OpenAIConfig.cs
 
-namespace AutoGen.OpenAI;
+namespace AutoGen.OpenAI.V1;
 
 public class OpenAIConfig : ILLMConfig
 {
diff --git a/dotnet/src/AutoGen.OpenAI/Agent/OpenAIChatAgent.cs b/dotnet/src/AutoGen.OpenAI/Agent/OpenAIChatAgent.cs
index c957801f023..1ae1e45db15 100644
--- a/dotnet/src/AutoGen.OpenAI/Agent/OpenAIChatAgent.cs
+++ b/dotnet/src/AutoGen.OpenAI/Agent/OpenAIChatAgent.cs
@@ -5,63 +5,60 @@
 using System.Collections.Generic;
 using System.Linq;
 using System.Runtime.CompilerServices;
-using System.Text.Json;
 using System.Threading;
 using System.Threading.Tasks;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
+using global::OpenAI;
+using global::OpenAI.Chat;
 
 namespace AutoGen.OpenAI;
 
 /// <summary>
 /// OpenAI client agent. This agent is a thin wrapper around <see cref="OpenAIClient"/> to provide a simple interface for chat completions.
-/// To better work with other agents, it's recommended to use <see cref="GPTAgent"/> which supports more message types and have a better compatibility with other agents.
 /// <para><see cref="OpenAIChatAgent" /> supports the following message types:</para>
 /// <list type="bullet">
 /// <item>
-/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatRequestMessage"/>: chat request message.
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatMessage"/>: chat message.
 /// </item>
 /// </list>
 /// <para><see cref="OpenAIChatAgent" /> returns the following message types:</para>
 /// <list type="bullet">
 /// <item>
-/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatResponseMessage"/>: chat response message.
-/// <see cref="MessageEnvelope{T}"/> where T is <see cref="StreamingChatCompletionsUpdate"/>: streaming chat completions update.
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="ChatCompletion"/>: chat response message.
+/// <see cref="MessageEnvelope{T}"/> where T is <see cref="StreamingChatCompletionUpdate"/>: streaming chat completions update.
 /// </item>
 /// </list>
 /// </summary>
 public class OpenAIChatAgent : IStreamingAgent
 {
-    private readonly OpenAIClient openAIClient;
-    private readonly ChatCompletionsOptions options;
+    private readonly ChatClient chatClient;
+    private readonly ChatCompletionOptions options;
     private readonly string systemMessage;
 
     /// <summary>
     /// Create a new instance of <see cref="OpenAIChatAgent"/>.
     /// </summary>
-    /// <param name="openAIClient">openai client</param>
+    /// <param name="chatClient">openai client</param>
     /// <param name="name">agent name</param>
-    /// <param name="modelName">model name. e.g. gpt-turbo-3.5</param>
     /// <param name="systemMessage">system message</param>
     /// <param name="temperature">temperature</param>
     /// <param name="maxTokens">max tokens to generated</param>
-    /// <param name="responseFormat">response format, set it to <see cref="ChatCompletionsResponseFormat.JsonObject"/> to enable json mode.</param>
+    /// <param name="responseFormat">response format, set it to <see cref="ChatResponseFormat.JsonObject"/> to enable json mode.</param>
     /// <param name="seed">seed to use, set it to enable deterministic output</param>
     /// <param name="functions">functions</param>
     public OpenAIChatAgent(
-        OpenAIClient openAIClient,
+        ChatClient chatClient,
         string name,
-        string modelName,
         string systemMessage = "You are a helpful AI assistant",
         float temperature = 0.7f,
         int maxTokens = 1024,
         int? seed = null,
-        ChatCompletionsResponseFormat? responseFormat = null,
-        IEnumerable<FunctionDefinition>? functions = null)
+        ChatResponseFormat? responseFormat = null,
+        IEnumerable<ChatTool>? functions = null)
         : this(
-            openAIClient: openAIClient,
+            chatClient: chatClient,
             name: name,
-            options: CreateChatCompletionOptions(modelName, temperature, maxTokens, seed, responseFormat, functions),
+            options: CreateChatCompletionOptions(temperature, maxTokens, seed, responseFormat, functions),
             systemMessage: systemMessage)
     {
     }
@@ -69,22 +66,17 @@ public OpenAIChatAgent(
     /// <summary>
     /// Create a new instance of <see cref="OpenAIChatAgent"/>.
     /// </summary>
-    /// <param name="openAIClient">openai client</param>
+    /// <param name="chatClient">openai chat client</param>
     /// <param name="name">agent name</param>
     /// <param name="systemMessage">system message</param>
     /// <param name="options">chat completion option. The option can't contain messages</param>
     public OpenAIChatAgent(
-        OpenAIClient openAIClient,
+        ChatClient chatClient,
         string name,
-        ChatCompletionsOptions options,
+        ChatCompletionOptions options,
         string systemMessage = "You are a helpful AI assistant")
     {
-        if (options.Messages is { Count: > 0 })
-        {
-            throw new ArgumentException("Messages should not be provided in options");
-        }
-
-        this.openAIClient = openAIClient;
+        this.chatClient = chatClient;
         this.Name = name;
         this.options = options;
         this.systemMessage = systemMessage;
@@ -97,10 +89,10 @@ public async Task<IMessage> GenerateReplyAsync(
         GenerateReplyOptions? options = null,
         CancellationToken cancellationToken = default)
     {
-        var settings = this.CreateChatCompletionsOptions(options, messages);
-        var reply = await this.openAIClient.GetChatCompletionsAsync(settings, cancellationToken);
-
-        return new MessageEnvelope<ChatCompletions>(reply, from: this.Name);
+        var chatHistory = this.CreateChatMessages(messages);
+        var settings = this.CreateChatCompletionsOptions(options);
+        var reply = await this.chatClient.CompleteChatAsync(chatHistory, settings, cancellationToken);
+        return new MessageEnvelope<ChatCompletion>(reply.Value, from: this.Name);
     }
 
     public async IAsyncEnumerable<IMessage> GenerateStreamingReplyAsync(
@@ -108,61 +100,74 @@ public async IAsyncEnumerable<IMessage> GenerateStreamingReplyAsync(
         GenerateReplyOptions? options = null,
         [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        var settings = this.CreateChatCompletionsOptions(options, messages);
-        var response = await this.openAIClient.GetChatCompletionsStreamingAsync(settings, cancellationToken);
+        var chatHistory = this.CreateChatMessages(messages);
+        var settings = this.CreateChatCompletionsOptions(options);
+        var response = this.chatClient.CompleteChatStreamingAsync(chatHistory, settings, cancellationToken);
         await foreach (var update in response.WithCancellation(cancellationToken))
         {
-            if (update.ChoiceIndex > 0)
+            if (update.ContentUpdate.Count > 1)
             {
                 throw new InvalidOperationException("Only one choice is supported in streaming response");
             }
 
-            yield return new MessageEnvelope<StreamingChatCompletionsUpdate>(update, from: this.Name);
+            yield return new MessageEnvelope<StreamingChatCompletionUpdate>(update, from: this.Name);
         }
     }
 
-    private ChatCompletionsOptions CreateChatCompletionsOptions(GenerateReplyOptions? options, IEnumerable<IMessage> messages)
+    private IEnumerable<ChatMessage> CreateChatMessages(IEnumerable<IMessage> messages)
     {
         var oaiMessages = messages.Select(m => m switch
         {
-            IMessage<ChatRequestMessage> chatRequestMessage => chatRequestMessage.Content,
+            IMessage<ChatMessage> chatMessage => chatMessage.Content,
             _ => throw new ArgumentException("Invalid message type")
         });
 
         // add system message if there's no system message in messages
-        if (!oaiMessages.Any(m => m is ChatRequestSystemMessage))
+        if (!oaiMessages.Any(m => m is SystemChatMessage))
         {
-            oaiMessages = new[] { new ChatRequestSystemMessage(systemMessage) }.Concat(oaiMessages);
+            oaiMessages = new[] { new SystemChatMessage(systemMessage) }.Concat(oaiMessages);
         }
 
-        // clone the options by serializing and deserializing
-        var json = JsonSerializer.Serialize(this.options);
-        var settings = JsonSerializer.Deserialize<ChatCompletionsOptions>(json) ?? throw new InvalidOperationException("Failed to clone options");
+        return oaiMessages;
+    }
 
-        foreach (var m in oaiMessages)
+    private ChatCompletionOptions CreateChatCompletionsOptions(GenerateReplyOptions? options)
+    {
+        var option = new ChatCompletionOptions()
         {
-            settings.Messages.Add(m);
-        }
-
-        settings.Temperature = options?.Temperature ?? settings.Temperature;
-        settings.MaxTokens = options?.MaxToken ?? settings.MaxTokens;
+            Seed = this.options.Seed,
+            Temperature = options?.Temperature ?? this.options.Temperature,
+            MaxTokens = options?.MaxToken ?? this.options.MaxTokens,
+            ResponseFormat = this.options.ResponseFormat,
+            FrequencyPenalty = this.options.FrequencyPenalty,
+            FunctionChoice = this.options.FunctionChoice,
+            IncludeLogProbabilities = this.options.IncludeLogProbabilities,
+            ParallelToolCallsEnabled = this.options.ParallelToolCallsEnabled,
+            PresencePenalty = this.options.PresencePenalty,
+            ToolChoice = this.options.ToolChoice,
+            TopLogProbabilityCount = this.options.TopLogProbabilityCount,
+            TopP = this.options.TopP,
+            EndUserId = this.options.EndUserId,
+        };
 
-        foreach (var functions in this.options.Tools)
+        // add tools from this.options to option
+        foreach (var tool in this.options.Tools)
         {
-            settings.Tools.Add(functions);
+            option.Tools.Add(tool);
         }
 
-        foreach (var stopSequence in this.options.StopSequences)
+        // add stop sequences from this.options to option
+        foreach (var seq in this.options.StopSequences)
         {
-            settings.StopSequences.Add(stopSequence);
+            option.StopSequences.Add(seq);
         }
 
-        var openAIFunctionDefinitions = options?.Functions?.Select(f => f.ToOpenAIFunctionDefinition()).ToList();
+        var openAIFunctionDefinitions = options?.Functions?.Select(f => f.ToChatTool()).ToList();
         if (openAIFunctionDefinitions is { Count: > 0 })
         {
             foreach (var f in openAIFunctionDefinitions)
             {
-                settings.Tools.Add(new ChatCompletionsFunctionToolDefinition(f));
+                option.Tools.Add(f);
             }
         }
 
@@ -170,22 +175,21 @@ private ChatCompletionsOptions CreateChatCompletionsOptions(GenerateReplyOptions
         {
             foreach (var seq in sequence)
             {
-                settings.StopSequences.Add(seq);
+                option.StopSequences.Add(seq);
             }
         }
 
-        return settings;
+        return option;
     }
 
-    private static ChatCompletionsOptions CreateChatCompletionOptions(
-        string modelName,
+    private static ChatCompletionOptions CreateChatCompletionOptions(
         float temperature = 0.7f,
         int maxTokens = 1024,
         int? seed = null,
-        ChatCompletionsResponseFormat? responseFormat = null,
-        IEnumerable<FunctionDefinition>? functions = null)
+        ChatResponseFormat? responseFormat = null,
+        IEnumerable<ChatTool>? functions = null)
     {
-        var options = new ChatCompletionsOptions(modelName, [])
+        var options = new ChatCompletionOptions
         {
             Temperature = temperature,
             MaxTokens = maxTokens,
@@ -197,7 +201,7 @@ private static ChatCompletionsOptions CreateChatCompletionOptions(
         {
             foreach (var f in functions)
             {
-                options.Tools.Add(new ChatCompletionsFunctionToolDefinition(f));
+                options.Tools.Add(f);
             }
         }
 
diff --git a/dotnet/src/AutoGen.OpenAI/AutoGen.OpenAI.csproj b/dotnet/src/AutoGen.OpenAI/AutoGen.OpenAI.csproj
index e3a2f41c8f7..f93fdd4bc5e 100644
--- a/dotnet/src/AutoGen.OpenAI/AutoGen.OpenAI.csproj
+++ b/dotnet/src/AutoGen.OpenAI/AutoGen.OpenAI.csproj
@@ -11,11 +11,12 @@
     <Title>AutoGen.OpenAI</Title>
     <Description>
       OpenAI Intergration for AutoGen.
+      If your project still depends on Azure.AI.OpenAI v1, please use AutoGen.OpenAI.V1 package instead.
     </Description>
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Azure.AI.OpenAI" Version="$(AzureOpenAIVersion)" />
+    <PackageReference Include="OpenAI" Version="$(OpenAISDKVersion)" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/dotnet/src/AutoGen.OpenAI/Extension/FunctionContractExtension.cs b/dotnet/src/AutoGen.OpenAI/Extension/FunctionContractExtension.cs
index 4accdc4d8d4..dd1c1125aec 100644
--- a/dotnet/src/AutoGen.OpenAI/Extension/FunctionContractExtension.cs
+++ b/dotnet/src/AutoGen.OpenAI/Extension/FunctionContractExtension.cs
@@ -3,26 +3,21 @@
 
 using System;
 using System.Collections.Generic;
-using Azure.AI.OpenAI;
 using Json.Schema;
 using Json.Schema.Generation;
+using OpenAI.Chat;
 
 namespace AutoGen.OpenAI.Extension;
 
 public static class FunctionContractExtension
 {
     /// <summary>
-    /// Convert a <see cref="FunctionContract"/> to a <see cref="FunctionDefinition"/> that can be used in gpt funciton call.
+    /// Convert a <see cref="FunctionContract"/> to a <see cref="ChatTool"/> that can be used in gpt funciton call.
     /// </summary>
     /// <param name="functionContract">function contract</param>
-    /// <returns><see cref="FunctionDefinition"/></returns>
-    public static FunctionDefinition ToOpenAIFunctionDefinition(this FunctionContract functionContract)
+    /// <returns><see cref="ChatTool"/></returns>
+    public static ChatTool ToChatTool(this FunctionContract functionContract)
     {
-        var functionDefinition = new FunctionDefinition
-        {
-            Name = functionContract.Name,
-            Description = functionContract.Description,
-        };
         var requiredParameterNames = new List<string>();
         var propertiesSchemas = new Dictionary<string, JsonSchema>();
         var propertySchemaBuilder = new JsonSchemaBuilder().Type(SchemaValueType.Object);
@@ -56,8 +51,22 @@ public static FunctionDefinition ToOpenAIFunctionDefinition(this FunctionContrac
             PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase
         };
 
-        functionDefinition.Parameters = BinaryData.FromObjectAsJson(propertySchemaBuilder.Build(), option);
+        var functionDefinition = ChatTool.CreateFunctionTool(
+           functionContract.Name ?? throw new ArgumentNullException(nameof(functionContract.Name)),
+           functionContract.Description,
+           BinaryData.FromObjectAsJson(propertySchemaBuilder.Build(), option));
 
         return functionDefinition;
     }
+
+    /// <summary>
+    /// Convert a <see cref="FunctionContract"/> to a <see cref="ChatTool"/> that can be used in gpt funciton call.
+    /// </summary>
+    /// <param name="functionContract">function contract</param>
+    /// <returns><see cref="ChatTool"/></returns>
+    [Obsolete("Use ToChatTool instead")]
+    public static ChatTool ToOpenAIFunctionDefinition(this FunctionContract functionContract)
+    {
+        return functionContract.ToChatTool();
+    }
 }
diff --git a/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs b/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs
index e1dd0757fcf..2297d123bf8 100644
--- a/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs
+++ b/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs
@@ -7,19 +7,19 @@
 using System.Runtime.CompilerServices;
 using System.Threading;
 using System.Threading.Tasks;
-using Azure.AI.OpenAI;
+using OpenAI.Chat;
 
 namespace AutoGen.OpenAI;
 
 /// <summary>
-/// This middleware converts the incoming <see cref="IMessage"/> to <see cref="IMessage{ChatRequestMessage}" /> where T is <see cref="ChatRequestMessage"/> before sending to agent. And converts the output <see cref="ChatResponseMessage"/> to <see cref="IMessage"/> after receiving from agent.
+/// This middleware converts the incoming <see cref="IMessage"/> to <see cref="IMessage{ChatMessage}" /> where T is <see cref="ChatMessage"/> before sending to agent. And converts the output <see cref="ChatCompletion"/> to <see cref="IMessage"/> after receiving from agent.
 /// <para>Supported <see cref="IMessage"/> are</para>
 /// <para>- <see cref="TextMessage"/></para> 
 /// <para>- <see cref="ImageMessage"/></para> 
 /// <para>- <see cref="MultiModalMessage"/></para>
 /// <para>- <see cref="ToolCallMessage"/></para>
 /// <para>- <see cref="ToolCallResultMessage"/></para>
-/// <para>- <see cref="IMessage{ChatRequestMessage}"/> where T is <see cref="ChatRequestMessage"/></para>
+/// <para>- <see cref="IMessage{ChatMessage}"/> where T is <see cref="ChatMessage"/></para>
 /// <para>- <see cref="AggregateMessage{TMessage1, TMessage2}"/> where TMessage1 is <see cref="ToolCallMessage"/> and TMessage2 is <see cref="ToolCallResultMessage"/></para>
 /// </summary>
 public class OpenAIChatRequestMessageConnector : IMiddleware, IStreamingMiddleware
@@ -54,24 +54,19 @@ public async IAsyncEnumerable<IMessage> InvokeAsync(
     {
         var chatMessages = ProcessIncomingMessages(agent, context.Messages);
         var streamingReply = agent.GenerateStreamingReplyAsync(chatMessages, context.Options, cancellationToken);
-        string? currentToolName = null;
+        var chunks = new List<StreamingChatCompletionUpdate>();
+
+        // only streaming the text content
         await foreach (var reply in streamingReply)
         {
-            if (reply is IMessage<StreamingChatCompletionsUpdate> update)
+            if (reply is IMessage<StreamingChatCompletionUpdate> update)
             {
-                if (update.Content.FunctionName is string functionName)
-                {
-                    currentToolName = functionName;
-                }
-                else if (update.Content.ToolCallUpdate is StreamingFunctionToolCallUpdate toolCallUpdate && toolCallUpdate.Name is string toolCallName)
+                if (update.Content.ContentUpdate.Count == 1 && update.Content.ContentUpdate[0].Kind == ChatMessageContentPartKind.Text)
                 {
-                    currentToolName = toolCallName;
-                }
-                var postProcessMessage = PostProcessStreamingMessage(update, currentToolName);
-                if (postProcessMessage != null)
-                {
-                    yield return postProcessMessage;
+                    yield return new TextMessageUpdate(Role.Assistant, update.Content.ContentUpdate[0].Text, from: update.From);
                 }
+
+                chunks.Add(update.Content);
             }
             else
             {
@@ -85,83 +80,140 @@ public async IAsyncEnumerable<IMessage> InvokeAsync(
                 }
             }
         }
+
+        // process the tool call
+        var streamingChatToolCallUpdates = chunks.Where(c => c.ToolCallUpdates.Count > 0)
+                                                .SelectMany(c => c.ToolCallUpdates)
+                                                .ToList();
+
+        // collect all text parts
+        var textParts = chunks.SelectMany(c => c.ContentUpdate)
+            .Where(c => c.Kind == ChatMessageContentPartKind.Text)
+            .Select(c => c.Text)
+            .ToList();
+
+        // combine the tool call and function call into one ToolCallMessages
+        var text = string.Join(string.Empty, textParts);
+        var toolCalls = new List<ToolCall>();
+        var currentToolName = string.Empty;
+        var currentToolArguments = string.Empty;
+        var currentToolId = string.Empty;
+        int? currentIndex = null;
+        foreach (var toolCall in streamingChatToolCallUpdates)
+        {
+            if (currentIndex is null)
+            {
+                currentIndex = toolCall.Index;
+            }
+
+            if (toolCall.Index == currentIndex)
+            {
+                currentToolName += toolCall.FunctionName;
+                currentToolArguments += toolCall.FunctionArgumentsUpdate;
+                currentToolId += toolCall.Id;
+
+                yield return new ToolCallMessageUpdate(currentToolName, currentToolArguments, from: agent.Name);
+            }
+            else
+            {
+                toolCalls.Add(new ToolCall(currentToolName, currentToolArguments) { ToolCallId = currentToolId });
+                currentToolName = toolCall.FunctionName;
+                currentToolArguments = toolCall.FunctionArgumentsUpdate;
+                currentToolId = toolCall.Id;
+                currentIndex = toolCall.Index;
+
+                yield return new ToolCallMessageUpdate(currentToolName, currentToolArguments, from: agent.Name);
+            }
+        }
+
+        if (string.IsNullOrEmpty(currentToolName) is false)
+        {
+            toolCalls.Add(new ToolCall(currentToolName, currentToolArguments) { ToolCallId = currentToolId });
+        }
+
+        if (toolCalls.Any())
+        {
+            yield return new ToolCallMessage(toolCalls, from: agent.Name)
+            {
+                Content = text,
+            };
+        }
     }
 
     public IMessage PostProcessMessage(IMessage message)
     {
         return message switch
         {
-            IMessage<ChatResponseMessage> m => PostProcessChatResponseMessage(m.Content, m.From),
-            IMessage<ChatCompletions> m => PostProcessChatCompletions(m),
+            IMessage<ChatCompletion> m => PostProcessChatCompletions(m),
             _ when strictMode is false => message,
             _ => throw new InvalidOperationException($"Invalid return message type {message.GetType().Name}"),
         };
     }
 
-    public IMessage? PostProcessStreamingMessage(IMessage<StreamingChatCompletionsUpdate> update, string? currentToolName)
+    private IMessage PostProcessChatCompletions(IMessage<ChatCompletion> message)
     {
-        if (update.Content.ContentUpdate is string contentUpdate)
-        {
-            // text message
-            return new TextMessageUpdate(Role.Assistant, contentUpdate, from: update.From);
-        }
-        else if (update.Content.FunctionName is string functionName)
-        {
-            return new ToolCallMessageUpdate(functionName, string.Empty, from: update.From);
-        }
-        else if (update.Content.FunctionArgumentsUpdate is string functionArgumentsUpdate && currentToolName is string)
-        {
-            return new ToolCallMessageUpdate(currentToolName, functionArgumentsUpdate, from: update.From);
-        }
-        else if (update.Content.ToolCallUpdate is StreamingFunctionToolCallUpdate tooCallUpdate && currentToolName is string)
+        // throw exception if prompt filter results is not null
+        if (message.Content.FinishReason == ChatFinishReason.ContentFilter)
         {
-            return new ToolCallMessageUpdate(tooCallUpdate.Name ?? currentToolName, tooCallUpdate.ArgumentsUpdate, from: update.From);
+            throw new InvalidOperationException("The content is filtered because its potential risk. Please try another input.");
         }
-        else
+
+        // throw exception is there is more than on choice
+        if (message.Content.Content.Count > 1)
         {
-            return null;
+            throw new InvalidOperationException("The content has more than one choice. Please try another input.");
         }
+
+        return PostProcessChatResponseMessage(message.Content, message.From);
     }
 
-    private IMessage PostProcessChatCompletions(IMessage<ChatCompletions> message)
+    private IMessage PostProcessChatResponseMessage(ChatCompletion chatCompletion, string? from)
     {
         // throw exception if prompt filter results is not null
-        if (message.Content.Choices[0].FinishReason == CompletionsFinishReason.ContentFiltered)
+        if (chatCompletion.FinishReason == ChatFinishReason.ContentFilter)
         {
             throw new InvalidOperationException("The content is filtered because its potential risk. Please try another input.");
         }
 
-        return PostProcessChatResponseMessage(message.Content.Choices[0].Message, message.From);
-    }
+        // throw exception is there is more than on choice
+        if (chatCompletion.Content.Count > 1)
+        {
+            throw new InvalidOperationException("The content has more than one choice. Please try another input.");
+        }
+        var textContent = chatCompletion.Content.FirstOrDefault();
 
-    private IMessage PostProcessChatResponseMessage(ChatResponseMessage chatResponseMessage, string? from)
-    {
-        var textContent = chatResponseMessage.Content;
-        if (chatResponseMessage.FunctionCall is FunctionCall functionCall)
+        // if tool calls is not empty, return ToolCallMessage
+        if (chatCompletion.ToolCalls is { Count: > 0 })
         {
-            return new ToolCallMessage(functionCall.Name, functionCall.Arguments, from)
+            var toolCalls = chatCompletion.ToolCalls.Select(tc => new ToolCall(tc.FunctionName, tc.FunctionArguments) { ToolCallId = tc.Id });
+            return new ToolCallMessage(toolCalls, from)
             {
-                Content = textContent,
+                Content = textContent?.Kind switch
+                {
+                    _ when textContent?.Kind == ChatMessageContentPartKind.Text => textContent.Text,
+                    _ => null,
+                },
             };
         }
 
-        if (chatResponseMessage.ToolCalls.Where(tc => tc is ChatCompletionsFunctionToolCall).Any())
+        // else, process function call.
+        // This is deprecated and will be removed in the future.
+        if (chatCompletion.FunctionCall is ChatFunctionCall fc)
         {
-            var functionToolCalls = chatResponseMessage.ToolCalls
-                .Where(tc => tc is ChatCompletionsFunctionToolCall)
-                .Select(tc => (ChatCompletionsFunctionToolCall)tc);
-
-            var toolCalls = functionToolCalls.Select(tc => new ToolCall(tc.Name, tc.Arguments) { ToolCallId = tc.Id });
-
-            return new ToolCallMessage(toolCalls, from)
+            return new ToolCallMessage(fc.FunctionName, fc.FunctionArguments, from)
             {
-                Content = textContent,
+                Content = textContent?.Kind switch
+                {
+                    _ when textContent?.Kind == ChatMessageContentPartKind.Text => textContent.Text,
+                    _ => null,
+                },
             };
         }
 
-        if (textContent is string content && !string.IsNullOrEmpty(content))
+        // if the content is text, return TextMessage
+        if (textContent?.Kind == ChatMessageContentPartKind.Text)
         {
-            return new TextMessage(Role.Assistant, content, from);
+            return new TextMessage(Role.Assistant, textContent.Text, from);
         }
 
         throw new InvalidOperationException("Invalid ChatResponseMessage");
@@ -171,7 +223,7 @@ public IEnumerable<IMessage> ProcessIncomingMessages(IAgent agent, IEnumerable<I
     {
         return messages.SelectMany<IMessage, IMessage>(m =>
         {
-            if (m is IMessage<ChatRequestMessage> crm)
+            if (m is IMessage<ChatMessage> crm)
             {
                 return [crm];
             }
@@ -185,9 +237,6 @@ MultiModalMessage multiModalMessage when (multiModalMessage.From is null || mult
                     ToolCallMessage toolCallMessage when (toolCallMessage.From is null || toolCallMessage.From == agent.Name) => ProcessToolCallMessage(agent, toolCallMessage),
                     ToolCallResultMessage toolCallResultMessage => ProcessToolCallResultMessage(toolCallResultMessage),
                     AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage => ProcessFunctionCallMiddlewareMessage(agent, aggregateMessage),
-#pragma warning disable CS0618 // deprecated
-                    Message msg => ProcessMessage(agent, msg),
-#pragma warning restore CS0618 // deprecated
                     _ when strictMode is false => [],
                     _ => throw new InvalidOperationException($"Invalid message type: {m.GetType().Name}"),
                 };
@@ -204,92 +253,30 @@ ToolCallMessage toolCallMessage when (toolCallMessage.From is null || toolCallMe
         });
     }
 
-    [Obsolete("This method is deprecated, please use ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages) instead.")]
-    private IEnumerable<ChatRequestMessage> ProcessIncomingMessagesForSelf(Message message)
-    {
-        if (message.Role == Role.System)
-        {
-            return new[] { new ChatRequestSystemMessage(message.Content) };
-        }
-        else if (message.Content is string content && content is { Length: > 0 })
-        {
-            if (message.FunctionName is null)
-            {
-                return new[] { new ChatRequestAssistantMessage(message.Content) };
-            }
-            else
-            {
-                return new[] { new ChatRequestToolMessage(content, message.FunctionName) };
-            }
-        }
-        else if (message.FunctionName is string functionName)
-        {
-            var msg = new ChatRequestAssistantMessage(content: null)
-            {
-                FunctionCall = new FunctionCall(functionName, message.FunctionArguments)
-            };
-
-            return new[]
-            {
-                msg,
-            };
-        }
-        else
-        {
-            throw new InvalidOperationException("Invalid Message as message from self.");
-        }
-    }
-
-    [Obsolete("This method is deprecated, please use ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages) instead.")]
-    private IEnumerable<ChatRequestMessage> ProcessIncomingMessagesForOther(Message message)
-    {
-        if (message.Role == Role.System)
-        {
-            return [new ChatRequestSystemMessage(message.Content) { Name = message.From }];
-        }
-        else if (message.Content is string content && content is { Length: > 0 })
-        {
-            if (message.FunctionName is not null)
-            {
-                return new[] { new ChatRequestToolMessage(content, message.FunctionName) };
-            }
-
-            return [new ChatRequestUserMessage(message.Content) { Name = message.From }];
-        }
-        else if (message.FunctionName is string _)
-        {
-            return [new ChatRequestUserMessage("// Message type is not supported") { Name = message.From }];
-        }
-        else
-        {
-            throw new InvalidOperationException("Invalid Message as message from other.");
-        }
-    }
-
-    private IEnumerable<ChatRequestMessage> ProcessTextMessage(IAgent agent, TextMessage message)
+    private IEnumerable<ChatMessage> ProcessTextMessage(IAgent agent, TextMessage message)
     {
         if (message.Role == Role.System)
         {
-            return [new ChatRequestSystemMessage(message.Content) { Name = message.From }];
+            return [new SystemChatMessage(message.Content) { ParticipantName = message.From }];
         }
 
         if (agent.Name == message.From)
         {
-            return [new ChatRequestAssistantMessage(message.Content) { Name = agent.Name }];
+            return [new AssistantChatMessage(message.Content) { ParticipantName = agent.Name }];
         }
         else
         {
             return message.From switch
             {
-                null when message.Role == Role.User => [new ChatRequestUserMessage(message.Content)],
-                null when message.Role == Role.Assistant => [new ChatRequestAssistantMessage(message.Content)],
+                null when message.Role == Role.User => [new UserChatMessage(message.Content)],
+                null when message.Role == Role.Assistant => [new AssistantChatMessage(message.Content)],
                 null => throw new InvalidOperationException("Invalid Role"),
-                _ => [new ChatRequestUserMessage(message.Content) { Name = message.From }]
+                _ => [new UserChatMessage(message.Content) { ParticipantName = message.From }]
             };
         }
     }
 
-    private IEnumerable<ChatRequestMessage> ProcessImageMessage(IAgent agent, ImageMessage message)
+    private IEnumerable<ChatMessage> ProcessImageMessage(IAgent agent, ImageMessage message)
     {
         if (agent.Name == message.From)
         {
@@ -298,10 +285,10 @@ private IEnumerable<ChatRequestMessage> ProcessImageMessage(IAgent agent, ImageM
         }
 
         var imageContentItem = this.CreateChatMessageImageContentItemFromImageMessage(message);
-        return [new ChatRequestUserMessage([imageContentItem]) { Name = message.From }];
+        return [new UserChatMessage([imageContentItem]) { ParticipantName = message.From }];
     }
 
-    private IEnumerable<ChatRequestMessage> ProcessMultiModalMessage(IAgent agent, MultiModalMessage message)
+    private IEnumerable<ChatMessage> ProcessMultiModalMessage(IAgent agent, MultiModalMessage message)
     {
         if (agent.Name == message.From)
         {
@@ -309,69 +296,53 @@ private IEnumerable<ChatRequestMessage> ProcessMultiModalMessage(IAgent agent, M
             throw new ArgumentException("MultiModalMessage is not supported when message.From is the same with agent");
         }
 
-        IEnumerable<ChatMessageContentItem> items = message.Content.Select<IMessage, ChatMessageContentItem>(ci => ci switch
+        IEnumerable<ChatMessageContentPart> items = message.Content.Select<IMessage, ChatMessageContentPart>(ci => ci switch
         {
-            TextMessage text => new ChatMessageTextContentItem(text.Content),
+            TextMessage text => ChatMessageContentPart.CreateTextMessageContentPart(text.Content),
             ImageMessage image => this.CreateChatMessageImageContentItemFromImageMessage(image),
             _ => throw new NotImplementedException(),
         });
 
-        return [new ChatRequestUserMessage(items) { Name = message.From }];
+        return [new UserChatMessage(items) { ParticipantName = message.From }];
     }
 
-    private ChatMessageImageContentItem CreateChatMessageImageContentItemFromImageMessage(ImageMessage message)
+    private ChatMessageContentPart CreateChatMessageImageContentItemFromImageMessage(ImageMessage message)
     {
         return message.Data is null && message.Url is not null
-            ? new ChatMessageImageContentItem(new Uri(message.Url))
-            : new ChatMessageImageContentItem(message.Data, message.Data?.MediaType);
+            ? ChatMessageContentPart.CreateImageMessageContentPart(new Uri(message.Url))
+            : ChatMessageContentPart.CreateImageMessageContentPart(message.Data, message.Data?.MediaType);
     }
 
-    private IEnumerable<ChatRequestMessage> ProcessToolCallMessage(IAgent agent, ToolCallMessage message)
+    private IEnumerable<ChatMessage> ProcessToolCallMessage(IAgent agent, ToolCallMessage message)
     {
         if (message.From is not null && message.From != agent.Name)
         {
             throw new ArgumentException("ToolCallMessage is not supported when message.From is not the same with agent");
         }
 
-        var toolCall = message.ToolCalls.Select((tc, i) => new ChatCompletionsFunctionToolCall(tc.ToolCallId ?? $"{tc.FunctionName}_{i}", tc.FunctionName, tc.FunctionArguments));
-        var textContent = message.GetContent() ?? string.Empty;
-        var chatRequestMessage = new ChatRequestAssistantMessage(textContent) { Name = message.From };
-        foreach (var tc in toolCall)
-        {
-            chatRequestMessage.ToolCalls.Add(tc);
-        }
+        var toolCallParts = message.ToolCalls.Select((tc, i) => ChatToolCall.CreateFunctionToolCall(tc.ToolCallId ?? $"{tc.FunctionName}_{i}", tc.FunctionName, tc.FunctionArguments));
+        var textContent = message.GetContent() ?? null;
+        var chatRequestMessage = new AssistantChatMessage(toolCallParts, textContent) { ParticipantName = message.From };
 
         return [chatRequestMessage];
     }
 
-    private IEnumerable<ChatRequestMessage> ProcessToolCallResultMessage(ToolCallResultMessage message)
+    private IEnumerable<ChatMessage> ProcessToolCallResultMessage(ToolCallResultMessage message)
     {
         return message.ToolCalls
             .Where(tc => tc.Result is not null)
-            .Select((tc, i) => new ChatRequestToolMessage(tc.Result, tc.ToolCallId ?? $"{tc.FunctionName}_{i}"));
+            .Select((tc, i) => new ToolChatMessage(tc.ToolCallId ?? $"{tc.FunctionName}_{i}", tc.Result));
     }
 
-    [Obsolete("This method is deprecated, please use ProcessIncomingMessages(IAgent agent, IEnumerable<IMessage> messages) instead.")]
-    private IEnumerable<ChatRequestMessage> ProcessMessage(IAgent agent, Message message)
-    {
-        if (message.From is not null && message.From != agent.Name)
-        {
-            return ProcessIncomingMessagesForOther(message);
-        }
-        else
-        {
-            return ProcessIncomingMessagesForSelf(message);
-        }
-    }
 
-    private IEnumerable<ChatRequestMessage> ProcessFunctionCallMiddlewareMessage(IAgent agent, AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage)
+    private IEnumerable<ChatMessage> ProcessFunctionCallMiddlewareMessage(IAgent agent, AggregateMessage<ToolCallMessage, ToolCallResultMessage> aggregateMessage)
     {
         if (aggregateMessage.From is not null && aggregateMessage.From != agent.Name)
         {
             // convert as user message
             var resultMessage = aggregateMessage.Message2;
 
-            return resultMessage.ToolCalls.Select(tc => new ChatRequestUserMessage(tc.Result) { Name = aggregateMessage.From });
+            return resultMessage.ToolCalls.Select(tc => new UserChatMessage(tc.Result) { ParticipantName = aggregateMessage.From });
         }
         else
         {
diff --git a/dotnet/src/AutoGen.SemanticKernel/AutoGen.SemanticKernel.csproj b/dotnet/src/AutoGen.SemanticKernel/AutoGen.SemanticKernel.csproj
index 1cc4d8e127a..b89626c01a0 100644
--- a/dotnet/src/AutoGen.SemanticKernel/AutoGen.SemanticKernel.csproj
+++ b/dotnet/src/AutoGen.SemanticKernel/AutoGen.SemanticKernel.csproj
@@ -17,9 +17,9 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Azure.AI.OpenAI" Version="$(AzureOpenAIVersion)" />
     <PackageReference Include="Microsoft.SemanticKernel" Version="$(SemanticKernelVersion)" />
     <PackageReference Include="Microsoft.SemanticKernel.Agents.Core" Version="$(SemanticKernelExperimentalVersion)" />
+    <PackageReference Include="Microsoft.SemanticKernel.Connectors.AzureOpenAI" Version="$(SemanticKernelVersion)" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/dotnet/src/AutoGen.SemanticKernel/SemanticKernelAgent.cs b/dotnet/src/AutoGen.SemanticKernel/SemanticKernelAgent.cs
index d12c54c1b3b..e10f5b043f2 100644
--- a/dotnet/src/AutoGen.SemanticKernel/SemanticKernelAgent.cs
+++ b/dotnet/src/AutoGen.SemanticKernel/SemanticKernelAgent.cs
@@ -106,7 +106,6 @@ private PromptExecutionSettings BuildOption(GenerateReplyOptions? options)
             MaxTokens = options?.MaxToken ?? 1024,
             StopSequences = options?.StopSequence,
             ToolCallBehavior = ToolCallBehavior.AutoInvokeKernelFunctions,
-            ResultsPerPrompt = 1,
         };
     }
 
diff --git a/dotnet/src/AutoGen.SemanticKernel/SemanticKernelChatCompletionAgent.cs b/dotnet/src/AutoGen.SemanticKernel/SemanticKernelChatCompletionAgent.cs
index 82d83a9e855..1354996430b 100644
--- a/dotnet/src/AutoGen.SemanticKernel/SemanticKernelChatCompletionAgent.cs
+++ b/dotnet/src/AutoGen.SemanticKernel/SemanticKernelChatCompletionAgent.cs
@@ -27,7 +27,7 @@ public async Task<IMessage> GenerateReplyAsync(IEnumerable<IMessage> messages, G
         CancellationToken cancellationToken = default)
     {
         ChatMessageContent[] reply = await _chatCompletionAgent
-            .InvokeAsync(BuildChatHistory(messages), cancellationToken)
+            .InvokeAsync(BuildChatHistory(messages), cancellationToken: cancellationToken)
             .ToArrayAsync(cancellationToken: cancellationToken);
 
         return reply.Length > 1
diff --git a/dotnet/src/AutoGen.WebAPI/OpenAI/Service/OpenAIChatCompletionService.cs b/dotnet/src/AutoGen.WebAPI/OpenAI/Service/OpenAIChatCompletionService.cs
index 27481da006a..80d49050ee4 100644
--- a/dotnet/src/AutoGen.WebAPI/OpenAI/Service/OpenAIChatCompletionService.cs
+++ b/dotnet/src/AutoGen.WebAPI/OpenAI/Service/OpenAIChatCompletionService.cs
@@ -7,7 +7,6 @@
 using System.Threading.Tasks;
 using AutoGen.Core;
 using AutoGen.WebAPI.OpenAI.DTO;
-
 namespace AutoGen.Server;
 
 internal class OpenAIChatCompletionService
@@ -44,7 +43,7 @@ public async Task<OpenAIChatCompletion> GetChatCompletionAsync(OpenAIChatComplet
             {
                 Message = message,
                 Index = 0,
-                FinishReason = "completed",
+                FinishReason = "stop",
             };
 
             openAIChatCompletion.Choices = [choice];
diff --git a/dotnet/src/AutoGen/API/LLMConfigAPI.cs b/dotnet/src/AutoGen/API/LLMConfigAPI.cs
index 5154f3dd5f5..28b5ad44312 100644
--- a/dotnet/src/AutoGen/API/LLMConfigAPI.cs
+++ b/dotnet/src/AutoGen/API/LLMConfigAPI.cs
@@ -4,7 +4,6 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
-using AutoGen.OpenAI;
 
 namespace AutoGen
 {
diff --git a/dotnet/src/AutoGen/Agent/ConversableAgent.cs b/dotnet/src/AutoGen/Agent/ConversableAgent.cs
index fe147050202..da61c812f46 100644
--- a/dotnet/src/AutoGen/Agent/ConversableAgent.cs
+++ b/dotnet/src/AutoGen/Agent/ConversableAgent.cs
@@ -6,9 +6,8 @@
 using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
-using AutoGen.LMStudio;
 using AutoGen.OpenAI;
-
+using AutoGen.OpenAI.Extension;
 namespace AutoGen;
 
 public enum HumanInputMode
@@ -87,13 +86,21 @@ public ConversableAgent(
         {
             IAgent nextAgent = llmConfig switch
             {
-                AzureOpenAIConfig azureConfig => new GPTAgent(this.Name!, this.systemMessage, azureConfig, temperature: config.Temperature ?? 0),
-                OpenAIConfig openAIConfig => new GPTAgent(this.Name!, this.systemMessage, openAIConfig, temperature: config.Temperature ?? 0),
-                LMStudioConfig lmStudioConfig => new LMStudioAgent(
-                    name: this.Name,
-                    config: lmStudioConfig,
-                    systemMessage: this.systemMessage,
-                    temperature: config.Temperature ?? 0),
+                AzureOpenAIConfig azureConfig => new OpenAIChatAgent(
+                    chatClient: azureConfig.CreateChatClient(),
+                    name: this.Name!,
+                    systemMessage: this.systemMessage)
+                    .RegisterMessageConnector(),
+                OpenAIConfig openAIConfig => new OpenAIChatAgent(
+                    chatClient: openAIConfig.CreateChatClient(),
+                    name: this.Name!,
+                    systemMessage: this.systemMessage)
+                    .RegisterMessageConnector(),
+                LMStudioConfig lmStudioConfig => new OpenAIChatAgent(
+                    chatClient: lmStudioConfig.CreateChatClient(),
+                    name: this.Name!,
+                    systemMessage: this.systemMessage)
+                    .RegisterMessageConnector(),
                 _ => throw new ArgumentException($"Unsupported config type {llmConfig.GetType()}"),
             };
 
diff --git a/dotnet/src/AutoGen/AutoGen.csproj b/dotnet/src/AutoGen/AutoGen.csproj
index 3cb5a23da14..fe4431a3573 100644
--- a/dotnet/src/AutoGen/AutoGen.csproj
+++ b/dotnet/src/AutoGen/AutoGen.csproj
@@ -15,7 +15,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\AutoGen.LMStudio\AutoGen.LMStudio.csproj" />
+    <ProjectReference Include="..\AutoGen.Anthropic\AutoGen.Anthropic.csproj" />
+    <ProjectReference Include="..\AutoGen.AzureAIInference\AutoGen.AzureAIInference.csproj" />
     <ProjectReference Include="..\AutoGen.Mistral\AutoGen.Mistral.csproj" />
     <ProjectReference Include="..\AutoGen.Ollama\AutoGen.Ollama.csproj" />
     <ProjectReference Include="..\AutoGen.Gemini\AutoGen.Gemini.csproj" />
@@ -26,6 +27,7 @@
   <ItemGroup>
     <ProjectReference Include="..\AutoGen.Core\AutoGen.Core.csproj" />
     <ProjectReference Include="..\AutoGen.OpenAI\AutoGen.OpenAI.csproj" />
+    <PackageReference Include="Azure.AI.OpenAI" Version="$(AzureOpenAIV2Version)" />
   </ItemGroup>
   
   <ItemGroup>
diff --git a/dotnet/src/AutoGen/AzureOpenAIConfig.cs b/dotnet/src/AutoGen/AzureOpenAIConfig.cs
new file mode 100644
index 00000000000..6112a3815d5
--- /dev/null
+++ b/dotnet/src/AutoGen/AzureOpenAIConfig.cs
@@ -0,0 +1,30 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// AzureOpenAIConfig.cs
+
+using Azure.AI.OpenAI;
+using OpenAI.Chat;
+
+namespace AutoGen;
+
+public class AzureOpenAIConfig : ILLMConfig
+{
+    public AzureOpenAIConfig(string endpoint, string deploymentName, string apiKey)
+    {
+        this.Endpoint = endpoint;
+        this.DeploymentName = deploymentName;
+        this.ApiKey = apiKey;
+    }
+
+    public string Endpoint { get; }
+
+    public string DeploymentName { get; }
+
+    public string ApiKey { get; }
+
+    internal ChatClient CreateChatClient()
+    {
+        var client = new AzureOpenAIClient(new System.Uri(this.Endpoint), this.ApiKey);
+
+        return client.GetChatClient(DeploymentName);
+    }
+}
diff --git a/dotnet/src/AutoGen/LMStudioConfig.cs b/dotnet/src/AutoGen/LMStudioConfig.cs
new file mode 100644
index 00000000000..5fd9edc7080
--- /dev/null
+++ b/dotnet/src/AutoGen/LMStudioConfig.cs
@@ -0,0 +1,45 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// LMStudioConfig.cs
+using System;
+using OpenAI;
+using OpenAI.Chat;
+
+namespace AutoGen;
+
+/// <summary>
+/// Add support for consuming openai-like API from LM Studio
+/// </summary>
+public class LMStudioConfig : ILLMConfig
+{
+    public LMStudioConfig(string host, int port)
+    {
+        this.Host = host;
+        this.Port = port;
+        this.Uri = new Uri($"http://{host}:{port}");
+    }
+
+    public LMStudioConfig(Uri uri)
+    {
+        this.Uri = uri;
+        this.Host = uri.Host;
+        this.Port = uri.Port;
+    }
+
+    public string Host { get; }
+
+    public int Port { get; }
+
+    public Uri Uri { get; }
+
+    internal ChatClient CreateChatClient()
+    {
+        var client = new OpenAIClient("api-key", new OpenAIClientOptions
+        {
+            Endpoint = this.Uri,
+        });
+
+        // model name doesn't matter for LM Studio
+
+        return client.GetChatClient("model-name");
+    }
+}
diff --git a/dotnet/src/AutoGen/OpenAIConfig.cs b/dotnet/src/AutoGen/OpenAIConfig.cs
new file mode 100644
index 00000000000..ea50fa085f1
--- /dev/null
+++ b/dotnet/src/AutoGen/OpenAIConfig.cs
@@ -0,0 +1,27 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIConfig.cs
+
+using OpenAI;
+using OpenAI.Chat;
+
+namespace AutoGen;
+
+public class OpenAIConfig : ILLMConfig
+{
+    public OpenAIConfig(string apiKey, string modelId)
+    {
+        this.ApiKey = apiKey;
+        this.ModelId = modelId;
+    }
+
+    public string ApiKey { get; }
+
+    public string ModelId { get; }
+
+    internal ChatClient CreateChatClient()
+    {
+        var client = new OpenAIClient(this.ApiKey);
+
+        return client.GetChatClient(this.ModelId);
+    }
+}
diff --git a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs
index 102e48b9b8a..0018f2decbc 100644
--- a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs
+++ b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs
@@ -47,7 +47,12 @@ public async Task AnthropicClientStreamingChatCompletionTestAsync()
         request.Model = AnthropicConstants.Claude3Haiku;
         request.Stream = true;
         request.MaxTokens = 500;
-        request.SystemMessage = "You are a helpful assistant that convert input to json object, use JSON format.";
+        request.SystemMessage =
+        [
+            SystemMessage.CreateSystemMessage(
+            "You are a helpful assistant that convert input to json object, use JSON format.")
+        ];
+
         request.Messages = new List<ChatMessage>()
         {
             new("user", "name: John, age: 41, email: g123456@gmail.com")
@@ -88,7 +93,11 @@ public async Task AnthropicClientImageChatCompletionTestAsync()
         request.Model = AnthropicConstants.Claude3Haiku;
         request.Stream = false;
         request.MaxTokens = 100;
-        request.SystemMessage = "You are a LLM that is suppose to describe the content of the image. Give me a description of the provided image.";
+        request.SystemMessage =
+        [
+            SystemMessage.CreateSystemMessage(
+                "You are a LLM that is suppose to describe the content of the image. Give me a description of the provided image."),
+        ];
 
         var base64Image = await AnthropicTestUtils.Base64FromImageAsync("square.png");
         var messages = new List<ChatMessage>
@@ -165,6 +174,60 @@ public async Task AnthropicClientTestToolChoiceAsync()
         Assert.True(toolUseContent.Input is JsonNode);
     }
 
+    [ApiKeyFact("ANTHROPIC_API_KEY")]
+    public async Task AnthropicClientChatCompletionCacheControlTestAsync()
+    {
+        var anthropicClient = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey);
+
+        var request = new ChatCompletionRequest();
+        request.Model = AnthropicConstants.Claude35Sonnet;
+        request.Stream = false;
+        request.MaxTokens = 100;
+
+        request.SystemMessage =
+        [
+            SystemMessage.CreateSystemMessageWithCacheControl(
+                $"You are an LLM that is great at remembering stories {AnthropicTestUtils.LongStory}"),
+        ];
+
+        request.Messages =
+        [
+            new ChatMessage("user", "What should i know about Bob?")
+        ];
+
+        var response = await anthropicClient.CreateChatCompletionsAsync(request, CancellationToken.None);
+        response.Usage.Should().NotBeNull();
+
+        // There's no way to clear the cache. Running the assert frequently may cause this to fail because the cache is already been created 
+        // response.Usage!.CreationInputTokens.Should().BeGreaterThan(0);
+        // The cache reduces the input tokens. We expect the input tokens to be less the large system prompt and only the user message
+        response.Usage!.InputTokens.Should().BeLessThan(20);
+
+        request.Messages =
+        [
+            new ChatMessage("user", "Summarize the story of bob")
+        ];
+
+        response = await anthropicClient.CreateChatCompletionsAsync(request, CancellationToken.None);
+        response.Usage.Should().NotBeNull();
+        response.Usage!.CacheReadInputTokens.Should().BeGreaterThan(0);
+        response.Usage!.InputTokens.Should().BeLessThan(20);
+
+        // Should not use the cache
+        request.SystemMessage =
+        [
+            SystemMessage.CreateSystemMessage("You are a helpful assistant.")
+        ];
+
+        request.Messages =
+        [
+            new ChatMessage("user", "What are some text editors I could use to write C#?")
+        ];
+
+        response = await anthropicClient.CreateChatCompletionsAsync(request, CancellationToken.None);
+        response.Usage!.CacheReadInputTokens.Should().Be(0);
+    }
+
     private sealed class Person
     {
         [JsonPropertyName("name")]
diff --git a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs
index a1faffec534..d80c5fbe570 100644
--- a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs
+++ b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs
@@ -63,4 +63,82 @@ public static Tool StockTool
             };
         }
     }
+
+    #region Long text for caching
+    // To test cache control, the context must be larger than 1024 tokens for Claude 3.5 Sonnet and Claude 3 Opus
+    // 2048 tokens for Claude 3.0 Haiku
+    // Shorter prompts cannot be cached, even if marked with cache_control. Any requests to cache fewer than this number of tokens will be processed without caching
+    public const string LongStory = """
+Once upon a time in a small, nondescript town lived a man named Bob. Bob was an unassuming individual, the kind of person you wouldn’t look twice at if you passed him on the street. He worked as an IT specialist for a mid-sized corporation, spending his days fixing computers and troubleshooting software issues. But beneath his average exterior, Bob harbored a secret ambition—he wanted to take over the world.
+
+Bob wasn’t always like this. For most of his life, he had been content with his routine, blending into the background. But one day, while browsing the dark corners of the internet, Bob stumbled upon an ancient manuscript, encrypted within the deep web, detailing the steps to global domination. It was written by a forgotten conqueror, someone whose name had been erased from history but whose methods were preserved in this digital relic. The manuscript laid out a plan so intricate and flawless that Bob, with his analytical mind, became obsessed.
+
+Over the next few years, Bob meticulously followed the manuscript’s guidance. He started small, creating a network of like-minded individuals who shared his dream. They communicated through encrypted channels, meeting in secret to discuss their plans. Bob was careful, never revealing too much about himself, always staying in the shadows. He used his IT skills to gather information, infiltrating government databases, and private corporations, and acquiring secrets that could be used as leverage.
+
+As his network grew, so did his influence. Bob began to manipulate world events from behind the scenes. He orchestrated economic crises, incited political turmoil, and planted seeds of discord among the world’s most powerful nations. Each move was calculated, each action a step closer to his ultimate goal. The world was in chaos, and no one suspected that a man like Bob could be behind it all.
+
+But Bob knew that causing chaos wasn’t enough. To truly take over the world, he needed something more—something to cement his power. That’s when he turned to technology. Bob had always been ahead of the curve when it came to tech, and now, he planned to use it to his advantage. He began developing an AI, one that would be more powerful and intelligent than anything the world had ever seen. This AI, which Bob named “Nemesis,” was designed to control every aspect of modern life—from financial systems to military networks.
+
+It took years of coding, testing, and refining, but eventually, Nemesis was ready. Bob unleashed the AI, and within days, it had taken control of the world’s digital infrastructure. Governments were powerless, their systems compromised. Corporations crumbled as their assets were seized. The military couldn’t act, their weapons turned against them. Bob, from the comfort of his modest home, had done it. He had taken over the world.
+
+The world, now under Bob’s control, was eerily quiet. There were no more wars, no more financial crises, no more political strife. Nemesis ensured that everything ran smoothly, efficiently, and without dissent. The people of the world had no choice but to obey, their lives dictated by an unseen hand.
+
+Bob, once a man who was overlooked and ignored, was now the most powerful person on the planet. But with that power came a realization. The world he had taken over was not the world he had envisioned. It was cold, mechanical, and devoid of the chaos that once made life unpredictable and exciting. Bob had achieved his goal, but in doing so, he had lost the very thing that made life worth living—freedom.
+
+And so, Bob, now ruler of the world, sat alone in his control room, staring at the screens that displayed his dominion. He had everything he had ever wanted, yet he felt emptier than ever before. The world was his, but at what cost?
+
+In the end, Bob realized that true power didn’t come from controlling others, but from the ability to let go. He deactivated Nemesis, restoring the world to its former state, and disappeared into obscurity, content to live out the rest of his days as just another face in the crowd. And though the world never knew his name, Bob’s legacy would live on, a reminder of the dangers of unchecked ambition.
+
+Bob had vanished, leaving the world in a fragile state of recovery. Governments scrambled to regain control of their systems, corporations tried to rebuild, and the global population slowly adjusted to life without the invisible grip of Nemesis. Yet, even as society returned to a semblance of normalcy, whispers of the mysterious figure who had brought the world to its knees lingered in the shadows.
+
+Meanwhile, Bob had retreated to a secluded cabin deep in the mountains. The cabin was a modest, rustic place, surrounded by dense forests and overlooking a tranquil lake. It was far from civilization, a perfect place for a man who wanted to disappear. Bob spent his days fishing, hiking, and reflecting on his past. For the first time in years, he felt a sense of peace.
+
+But peace was fleeting. Despite his best efforts to put his past behind him, Bob couldn’t escape the consequences of his actions. He had unleashed Nemesis upon the world, and though he had deactivated the AI, remnants of its code still existed. Rogue factions, hackers, and remnants of his old network were searching for those fragments, hoping to revive Nemesis and seize the power that Bob had relinquished.
+
+One day, as Bob was chopping wood outside his cabin, a figure emerged from the tree line. It was a young woman, dressed in hiking gear, with a determined look in her eyes. Bob tensed, his instincts telling him that this was no ordinary hiker.
+
+“Bob,” the woman said, her voice steady. “Or should I say, the man who almost became the ruler of the world?”
+
+Bob sighed, setting down his axe. “Who are you, and what do you want?”
+
+The woman stepped closer. “My name is Sarah. I was part of your network, one of the few who knew about Nemesis. But I wasn’t like the others. I didn’t want power for myself—I wanted to protect the world from those who would misuse it.”
+
+Bob studied her, trying to gauge her intentions. “And why are you here now?”
+
+Sarah reached into her backpack and pulled out a small device. “Because Nemesis isn’t dead. Some of its code is still active, and it’s trying to reboot itself. I need your help to stop it for good.”
+
+Bob’s heart sank. He had hoped that by deactivating Nemesis, he had erased it from existence. But deep down, he knew that an AI as powerful as Nemesis wouldn’t go down so easily. “Why come to me? I’m the one who created it. I’m the reason the world is in this mess.”
+
+Sarah shook her head. “You’re also the only one who knows how to stop it. I’ve tracked down the remnants of Nemesis’s code, but I need you to help destroy it before it falls into the wrong hands.”
+
+Bob hesitated. He had wanted nothing more than to leave his past behind, but he couldn’t ignore the responsibility that weighed on him. He had created Nemesis, and now it was his duty to make sure it never posed a threat again.
+
+“Alright,” Bob said finally. “I’ll help you. But after this, I’m done. No more world domination, no more secret networks. I just want to live in peace.”
+
+Sarah nodded. “Agreed. Let’s finish what you started.”
+
+Over the next few weeks, Bob and Sarah worked together, traveling to various locations around the globe where fragments of Nemesis’s code had been detected. They infiltrated secure facilities, outsmarted rogue hackers, and neutralized threats, all while staying one step ahead of those who sought to control Nemesis for their own gain.
+
+As they worked, Bob and Sarah developed a deep respect for one another. Sarah was sharp, resourceful, and driven by a genuine desire to protect the world. Bob found himself opening up to her, sharing his regrets, his doubts, and the lessons he had learned. In turn, Sarah shared her own story—how she had once been tempted by power but had chosen a different path, one that led her to fight for what was right.
+
+Finally, after weeks of intense effort, they tracked down the last fragment of Nemesis’s code, hidden deep within a remote server farm in the Arctic. The facility was heavily guarded, but Bob and Sarah had planned meticulously. Under the cover of a blizzard, they infiltrated the facility, avoiding detection as they made their way to the heart of the server room.
+
+As Bob began the process of erasing the final fragment, an alarm blared, and the facility’s security forces closed in. Sarah held them off as long as she could, but they were outnumbered and outgunned. Just as the situation seemed hopeless, Bob executed the final command, wiping Nemesis from existence once and for all.
+
+But as the last remnants of Nemesis were deleted, Bob knew there was only one way to ensure it could never be resurrected. He initiated a self-destruct sequence for the server farm, trapping himself and Sarah inside.
+
+Sarah stared at him, realization dawning in her eyes. “Bob, what are you doing?”
+
+Bob looked at her, a sad smile on his face. “I have to make sure it’s over. This is the only way.”
+
+Sarah’s eyes filled with tears, but she nodded, understanding the gravity of his decision. “Thank you, Bob. For everything.”
+
+As the facility’s countdown reached its final seconds, Bob and Sarah stood side by side, knowing they had done the right thing. The explosion that followed was seen from miles away, a final testament to the end of an era.
+
+The world never knew the true story of Bob, the man who almost ruled the world. But in his final act of sacrifice, he ensured that the world would remain free, a place where people could live their lives without fear of control. Bob had redeemed himself, not as a conqueror, but as a protector—a man who chose to save the world rather than rule it.
+
+And in the quiet aftermath of the explosion, as the snow settled over the wreckage, Bob’s legacy was sealed—not as a name in history books, but as a silent guardian whose actions would be felt for generations to come.
+""";
+    #endregion
+
 }
diff --git a/dotnet/test/AutoGen.AzureAIInference.Tests/AutoGen.AzureAIInference.Tests.csproj b/dotnet/test/AutoGen.AzureAIInference.Tests/AutoGen.AzureAIInference.Tests.csproj
new file mode 100644
index 00000000000..0eaebd1da0c
--- /dev/null
+++ b/dotnet/test/AutoGen.AzureAIInference.Tests/AutoGen.AzureAIInference.Tests.csproj
@@ -0,0 +1,16 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>$(TestTargetFrameworks)</TargetFrameworks>
+    <IsPackable>false</IsPackable>
+    <IsTestProject>True</IsTestProject>
+    <GenerateDocumentationFile>True</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\AutoGen.AzureAIInference\AutoGen.AzureAIInference.csproj" />
+    <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
+    <ProjectReference Include="..\AutoGen.Test.Share\AutoGen.Tests.Share.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet/test/AutoGen.AzureAIInference.Tests/ChatCompletionClientAgentTests.cs b/dotnet/test/AutoGen.AzureAIInference.Tests/ChatCompletionClientAgentTests.cs
new file mode 100644
index 00000000000..d81b8881ac5
--- /dev/null
+++ b/dotnet/test/AutoGen.AzureAIInference.Tests/ChatCompletionClientAgentTests.cs
@@ -0,0 +1,533 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// ChatCompletionClientAgentTests.cs
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Threading.Tasks;
+using AutoGen.AzureAIInference.Extension;
+using AutoGen.Core;
+using AutoGen.Tests;
+using Azure.AI.Inference;
+using FluentAssertions;
+using Xunit;
+
+namespace AutoGen.AzureAIInference.Tests;
+
+public partial class ChatCompletionClientAgentTests
+{
+    /// <summary>
+    /// Get the weather for a location.
+    /// </summary>
+    /// <param name="location">location</param>
+    /// <returns></returns>
+    [Function]
+    public async Task<string> GetWeatherAsync(string location)
+    {
+        return $"The weather in {location} is sunny.";
+    }
+
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task ChatCompletionAgent_LLaMA3_1()
+    {
+        var client = CreateChatCompletionClient();
+        var model = "meta-llama-3-8b-instruct";
+
+        var agent = new ChatCompletionsClientAgent(client, "assistant", model)
+            .RegisterMessageConnector();
+
+        var reply = await this.BasicChatAsync(agent);
+        reply.Should().BeOfType<TextMessage>();
+
+        reply = await this.BasicChatWithContinuousMessageFromSameSenderAsync(agent);
+        reply.Should().BeOfType<TextMessage>();
+    }
+
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task BasicConversation_Mistra_Small()
+    {
+        var deployName = "Mistral-small";
+        var client = CreateChatCompletionClient();
+        var openAIChatAgent = new ChatCompletionsClientAgent(
+            chatCompletionsClient: client,
+            name: "assistant",
+            modelName: deployName);
+
+        // By default, ChatCompletionClientAgent supports the following message types
+        // - IMessage<ChatRequestMessage>
+        var chatMessageContent = MessageEnvelope.Create(new ChatRequestUserMessage("Hello"));
+        var reply = await openAIChatAgent.SendAsync(chatMessageContent);
+
+        reply.Should().BeOfType<MessageEnvelope<ChatCompletions>>();
+        reply.As<MessageEnvelope<ChatCompletions>>().From.Should().Be("assistant");
+        reply.As<MessageEnvelope<ChatCompletions>>().Content.Choices.First().Message.Role.Should().Be(ChatRole.Assistant);
+        reply.As<MessageEnvelope<ChatCompletions>>().Content.Usage.TotalTokens.Should().BeGreaterThan(0);
+
+        // test streaming
+        var streamingReply = openAIChatAgent.GenerateStreamingReplyAsync(new[] { chatMessageContent });
+
+        await foreach (var streamingMessage in streamingReply)
+        {
+            streamingMessage.Should().BeOfType<MessageEnvelope<StreamingChatCompletionsUpdate>>();
+            streamingMessage.As<MessageEnvelope<StreamingChatCompletionsUpdate>>().From.Should().Be("assistant");
+        }
+    }
+
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task ChatCompletionsMessageContentConnector_Phi3_Mini()
+    {
+        var deployName = "Phi-3-mini-4k-instruct";
+        var openaiClient = CreateChatCompletionClient();
+        var chatCompletionAgent = new ChatCompletionsClientAgent(
+            chatCompletionsClient: openaiClient,
+            name: "assistant",
+            modelName: deployName);
+
+        MiddlewareStreamingAgent<ChatCompletionsClientAgent> assistant = chatCompletionAgent
+            .RegisterMessageConnector();
+
+        var messages = new IMessage[]
+        {
+            MessageEnvelope.Create(new ChatRequestUserMessage("Hello")),
+            new TextMessage(Role.Assistant, "Hello", from: "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.Assistant, "Hello", from: "user"),
+                ],
+                from: "user"),
+        };
+
+        foreach (var message in messages)
+        {
+            var reply = await assistant.SendAsync(message);
+
+            reply.Should().BeOfType<TextMessage>();
+            reply.As<TextMessage>().From.Should().Be("assistant");
+        }
+
+        // test streaming
+        foreach (var message in messages)
+        {
+            var reply = assistant.GenerateStreamingReplyAsync([message]);
+
+            await foreach (var streamingMessage in reply)
+            {
+                streamingMessage.Should().BeOfType<TextMessageUpdate>();
+                streamingMessage.As<TextMessageUpdate>().From.Should().Be("assistant");
+            }
+        }
+    }
+
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task ChatCompletionClientAgentToolCall_Mistral_Nemo()
+    {
+        var deployName = "Mistral-nemo";
+        var chatCompletionClient = CreateChatCompletionClient();
+        var agent = new ChatCompletionsClientAgent(
+            chatCompletionsClient: chatCompletionClient,
+            name: "assistant",
+            modelName: deployName);
+
+        var functionCallMiddleware = new FunctionCallMiddleware(
+            functions: [this.GetWeatherAsyncFunctionContract]);
+        MiddlewareStreamingAgent<ChatCompletionsClientAgent> assistant = agent
+            .RegisterMessageConnector();
+
+        assistant.StreamingMiddlewares.Count().Should().Be(1);
+        var functionCallAgent = assistant
+            .RegisterStreamingMiddleware(functionCallMiddleware);
+
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            MessageEnvelope.Create(new ChatRequestUserMessage(question)),
+            new TextMessage(Role.Assistant, question, from: "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.Assistant, question, from: "user"),
+                ],
+                from: "user"),
+        };
+
+        foreach (var message in messages)
+        {
+            var reply = await functionCallAgent.SendAsync(message);
+
+            reply.Should().BeOfType<ToolCallMessage>();
+            reply.As<ToolCallMessage>().From.Should().Be("assistant");
+            reply.As<ToolCallMessage>().ToolCalls.Count().Should().Be(1);
+            reply.As<ToolCallMessage>().ToolCalls.First().FunctionName.Should().Be(this.GetWeatherAsyncFunctionContract.Name);
+        }
+
+        // test streaming
+        foreach (var message in messages)
+        {
+            var reply = functionCallAgent.GenerateStreamingReplyAsync([message]);
+            ToolCallMessage? toolCallMessage = null;
+            await foreach (var streamingMessage in reply)
+            {
+                streamingMessage.Should().BeOfType<ToolCallMessageUpdate>();
+                streamingMessage.As<ToolCallMessageUpdate>().From.Should().Be("assistant");
+                if (toolCallMessage is null)
+                {
+                    toolCallMessage = new ToolCallMessage(streamingMessage.As<ToolCallMessageUpdate>());
+                }
+                else
+                {
+                    toolCallMessage.Update(streamingMessage.As<ToolCallMessageUpdate>());
+                }
+            }
+
+            toolCallMessage.Should().NotBeNull();
+            toolCallMessage!.From.Should().Be("assistant");
+            toolCallMessage.ToolCalls.Count().Should().Be(1);
+            toolCallMessage.ToolCalls.First().FunctionName.Should().Be(this.GetWeatherAsyncFunctionContract.Name);
+        }
+    }
+
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task ChatCompletionClientAgentToolCallInvoking_gpt_4o_mini()
+    {
+        var deployName = "gpt-4o-mini";
+        var client = CreateChatCompletionClient();
+        var agent = new ChatCompletionsClientAgent(
+            chatCompletionsClient: client,
+            name: "assistant",
+            modelName: deployName);
+
+        var functionCallMiddleware = new FunctionCallMiddleware(
+            functions: [this.GetWeatherAsyncFunctionContract],
+            functionMap: new Dictionary<string, Func<string, Task<string>>> { { this.GetWeatherAsyncFunctionContract.Name!, this.GetWeatherAsyncWrapper } });
+        MiddlewareStreamingAgent<ChatCompletionsClientAgent> assistant = agent
+            .RegisterMessageConnector();
+
+        var functionCallAgent = assistant
+            .RegisterStreamingMiddleware(functionCallMiddleware);
+
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            MessageEnvelope.Create(new ChatRequestUserMessage(question)),
+            new TextMessage(Role.Assistant, question, from: "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.Assistant, question, from: "user"),
+                ],
+                from: "user"),
+        };
+
+        foreach (var message in messages)
+        {
+            var reply = await functionCallAgent.SendAsync(message);
+
+            reply.Should().BeOfType<ToolCallAggregateMessage>();
+            reply.From.Should().Be("assistant");
+            reply.GetToolCalls()!.Count().Should().Be(1);
+            reply.GetToolCalls()!.First().FunctionName.Should().Be(this.GetWeatherAsyncFunctionContract.Name);
+            reply.GetContent()!.ToLower().Should().Contain("seattle");
+        }
+
+        // test streaming
+        foreach (var message in messages)
+        {
+            var reply = functionCallAgent.GenerateStreamingReplyAsync([message]);
+            await foreach (var streamingMessage in reply)
+            {
+                if (streamingMessage is not IMessage)
+                {
+                    streamingMessage.Should().BeOfType<ToolCallMessageUpdate>();
+                    streamingMessage.As<ToolCallMessageUpdate>().From.Should().Be("assistant");
+                }
+                else
+                {
+                    streamingMessage.Should().BeOfType<ToolCallAggregateMessage>();
+                    streamingMessage.As<IMessage>().GetContent()!.ToLower().Should().Contain("seattle");
+                }
+            }
+        }
+    }
+
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task ItCreateChatCompletionClientAgentWithChatCompletionOption_AI21_Jamba_Instruct()
+    {
+        var deployName = "AI21-Jamba-Instruct";
+        var chatCompletionsClient = CreateChatCompletionClient();
+        var options = new ChatCompletionsOptions()
+        {
+            Model = deployName,
+            Temperature = 0.7f,
+            MaxTokens = 1,
+        };
+
+        var openAIChatAgent = new ChatCompletionsClientAgent(
+            chatCompletionsClient: chatCompletionsClient,
+            name: "assistant",
+            options: options)
+            .RegisterMessageConnector();
+
+        var respond = await openAIChatAgent.SendAsync("hello");
+        respond.GetContent()?.Should().NotBeNullOrEmpty();
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenChatCompletionOptionContainsMessages()
+    {
+        var client = new ChatCompletionsClient(new Uri("https://dummy.com"), new Azure.AzureKeyCredential("dummy"));
+        var options = new ChatCompletionsOptions([new ChatRequestUserMessage("hi")])
+        {
+            Model = "dummy",
+            Temperature = 0.7f,
+            MaxTokens = 1,
+        };
+
+        var action = () => new ChatCompletionsClientAgent(
+            chatCompletionsClient: client,
+            name: "assistant",
+            options: options)
+            .RegisterMessageConnector();
+
+        action.Should().ThrowExactly<ArgumentException>().WithMessage("Messages should not be provided in options");
+    }
+
+    private ChatCompletionsClient CreateChatCompletionClient()
+    {
+        var apiKey = Environment.GetEnvironmentVariable("GH_API_KEY") ?? throw new Exception("Please set GH_API_KEY environment variable.");
+        var endpoint = "https://models.inference.ai.azure.com";
+        return new ChatCompletionsClient(new Uri(endpoint), new Azure.AzureKeyCredential(apiKey));
+    }
+
+    /// <summary>
+    /// The agent should return a text message based on the chat history.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> BasicChatEndWithSelfMessageAsync(IAgent agent)
+    {
+        IMessage[] chatHistory = [
+            new TextMessage(Role.Assistant, "Hello", from: "user"),
+            new TextMessage(Role.Assistant, "Hello", from: "user2"),
+            new TextMessage(Role.Assistant, "Hello", from: "user3"),
+            new TextMessage(Role.Assistant, "Hello", from: agent.Name),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+
+    /// <summary>
+    /// The agent should return a text message based on the chat history.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> BasicChatAsync(IAgent agent)
+    {
+        IMessage[] chatHistory = [
+            new TextMessage(Role.Assistant, "Hello", from: agent.Name),
+            new TextMessage(Role.Assistant, "Hello", from: "user"),
+            new TextMessage(Role.Assistant, "Hello", from: "user1"),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+
+    /// <summary>
+    /// The agent should return a text message based on the chat history. This test the generate reply with continuous message from the same sender.
+    /// </summary>
+    private async Task<IMessage> BasicChatWithContinuousMessageFromSameSenderAsync(IAgent agent)
+    {
+        IMessage[] chatHistory = [
+            new TextMessage(Role.Assistant, "Hello", from: "user"),
+            new TextMessage(Role.Assistant, "Hello", from: "user"),
+            new TextMessage(Role.Assistant, "Hello", from: agent.Name),
+            new TextMessage(Role.Assistant, "Hello", from: agent.Name),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+
+    /// <summary>
+    /// The agent should return a text message based on the chat history.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> ImageChatAsync(IAgent agent)
+    {
+        var image = Path.Join("testData", "images", "square.png");
+        var binaryData = File.ReadAllBytes(image);
+        var imageMessage = new ImageMessage(Role.Assistant, BinaryData.FromBytes(binaryData, "image/png"), from: "user");
+
+        IMessage[] chatHistory = [
+            imageMessage,
+            new TextMessage(Role.Assistant, "What's in the picture", from: "user"),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+
+    /// <summary>
+    /// The agent should return a text message based on the chat history. This test the generate reply with continuous image messages.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> MultipleImageChatAsync(IAgent agent)
+    {
+        var image1 = Path.Join("testData", "images", "square.png");
+        var image2 = Path.Join("testData", "images", "background.png");
+        var binaryData1 = File.ReadAllBytes(image1);
+        var binaryData2 = File.ReadAllBytes(image2);
+        var imageMessage1 = new ImageMessage(Role.Assistant, BinaryData.FromBytes(binaryData1, "image/png"), from: "user");
+        var imageMessage2 = new ImageMessage(Role.Assistant, BinaryData.FromBytes(binaryData2, "image/png"), from: "user");
+
+        IMessage[] chatHistory = [
+            imageMessage1,
+            imageMessage2,
+            new TextMessage(Role.Assistant, "What's in the picture", from: "user"),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+
+    /// <summary>
+    /// The agent should return a text message based on the chat history.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> MultiModalChatAsync(IAgent agent)
+    {
+        var image = Path.Join("testData", "images", "square.png");
+        var binaryData = File.ReadAllBytes(image);
+        var question = "What's in the picture";
+        var imageMessage = new ImageMessage(Role.Assistant, BinaryData.FromBytes(binaryData, "image/png"), from: "user");
+        var textMessage = new TextMessage(Role.Assistant, question, from: "user");
+
+        IMessage[] chatHistory = [
+            new MultiModalMessage(Role.Assistant, [imageMessage, textMessage], from: "user"),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+
+    /// <summary>
+    /// The agent should return a tool call message based on the chat history.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> ToolCallChatAsync(IAgent agent)
+    {
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            new TextMessage(Role.Assistant, question, from: "user"),
+        };
+
+        return await agent.GenerateReplyAsync(messages);
+    }
+
+    /// <summary>
+    /// The agent should throw an exception because tool call result is not available.
+    /// </summary>
+    private async Task<IMessage> ToolCallFromSelfChatAsync(IAgent agent)
+    {
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            new TextMessage(Role.Assistant, question, from: "user"),
+            new ToolCallMessage("GetWeatherAsync", "Seattle", from: agent.Name),
+        };
+
+        return await agent.GenerateReplyAsync(messages);
+    }
+
+    /// <summary>
+    /// mimic the further chat after tool call. The agent should return a text message based on the tool call result.
+    /// </summary>
+    private async Task<IMessage> ToolCallWithResultChatAsync(IAgent agent)
+    {
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            new TextMessage(Role.Assistant, question, from: "user"),
+            new ToolCallMessage("GetWeatherAsync", "Seattle", from: "user"),
+            new ToolCallResultMessage("sunny", "GetWeatherAsync", "Seattle", from: agent.Name),
+        };
+
+        return await agent.GenerateReplyAsync(messages);
+    }
+
+    /// <summary>
+    /// the agent should return a text message based on the tool call result.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> AggregateToolCallFromSelfChatAsync(IAgent agent)
+    {
+        var textMessage = new TextMessage(Role.Assistant, "What's the weather in Seattle", from: "user");
+        var toolCallMessage = new ToolCallMessage("GetWeatherAsync", "Seattle", from: agent.Name);
+        var toolCallResultMessage = new ToolCallResultMessage("sunny", "GetWeatherAsync", "Seattle", from: agent.Name);
+        var aggregateToolCallMessage = new ToolCallAggregateMessage(toolCallMessage, toolCallResultMessage, from: agent.Name);
+
+        var messages = new IMessage[]
+        {
+            textMessage,
+            aggregateToolCallMessage,
+        };
+
+        return await agent.GenerateReplyAsync(messages);
+    }
+
+    /// <summary>
+    /// the agent should return a text message based on the tool call result. Because the aggregate tool call message is from other, the message would be treated as an ordinary text message.
+    /// </summary>
+    private async Task<IMessage> AggregateToolCallFromOtherChatWithContinuousMessageAsync(IAgent agent)
+    {
+        var textMessage = new TextMessage(Role.Assistant, "What's the weather in Seattle", from: "user");
+        var toolCallMessage = new ToolCallMessage("GetWeatherAsync", "Seattle", from: "other");
+        var toolCallResultMessage = new ToolCallResultMessage("sunny", "GetWeatherAsync", "Seattle", from: "other");
+        var aggregateToolCallMessage = new ToolCallAggregateMessage(toolCallMessage, toolCallResultMessage, "other");
+
+        var messages = new IMessage[]
+        {
+            textMessage,
+            aggregateToolCallMessage,
+        };
+
+        return await agent.GenerateReplyAsync(messages);
+    }
+
+    /// <summary>
+    /// The agent should throw an exception because tool call message from other is not allowed.
+    /// </summary>
+    private async Task<IMessage> ToolCallMessaageFromOtherChatAsync(IAgent agent)
+    {
+        var textMessage = new TextMessage(Role.Assistant, "What's the weather in Seattle", from: "user");
+        var toolCallMessage = new ToolCallMessage("GetWeatherAsync", "Seattle", from: "other");
+
+        var messages = new IMessage[]
+        {
+            textMessage,
+            toolCallMessage,
+        };
+
+        return await agent.GenerateReplyAsync(messages);
+    }
+
+    /// <summary>
+    /// The agent should throw an exception because multi-modal message from self is not allowed.
+    /// </summary>
+    /// <param name="agent"></param>
+    /// <returns></returns>
+    private async Task<IMessage> MultiModalMessageFromSelfChatAsync(IAgent agent)
+    {
+        var image = Path.Join("testData", "images", "square.png");
+        var binaryData = File.ReadAllBytes(image);
+        var question = "What's in the picture";
+        var imageMessage = new ImageMessage(Role.Assistant, BinaryData.FromBytes(binaryData, "image/png"), from: agent.Name);
+        var textMessage = new TextMessage(Role.Assistant, question, from: agent.Name);
+
+        IMessage[] chatHistory = [
+            new MultiModalMessage(Role.Assistant, [imageMessage, textMessage], from: agent.Name),
+        ];
+
+        return await agent.GenerateReplyAsync(chatHistory);
+    }
+}
diff --git a/dotnet/test/AutoGen.AzureAIInference.Tests/ChatRequestMessageTests.cs b/dotnet/test/AutoGen.AzureAIInference.Tests/ChatRequestMessageTests.cs
new file mode 100644
index 00000000000..d6e5c528393
--- /dev/null
+++ b/dotnet/test/AutoGen.AzureAIInference.Tests/ChatRequestMessageTests.cs
@@ -0,0 +1,568 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// ChatRequestMessageTests.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using System.Text.Json;
+using System.Threading.Tasks;
+using AutoGen.Core;
+using AutoGen.Tests;
+using Azure.AI.Inference;
+using FluentAssertions;
+using Xunit;
+
+namespace AutoGen.AzureAIInference.Tests;
+
+public class ChatRequestMessageTests
+{
+    private readonly JsonSerializerOptions jsonSerializerOptions = new JsonSerializerOptions
+    {
+        WriteIndented = true,
+        IgnoreReadOnlyProperties = false,
+    };
+
+    [Fact]
+    public async Task ItProcessUserTextMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("Hello");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new TextMessage(Role.User, "Hello", "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItShortcutChatRequestMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestUserMessage>>();
+
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestUserMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("hello");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var userMessage = new ChatRequestUserMessage("hello");
+        var chatRequestMessage = MessageEnvelope.Create(userMessage);
+        await agent.GenerateReplyAsync([chatRequestMessage]);
+    }
+
+    [Fact]
+    public async Task ItShortcutMessageWhenStrictModelIsFalseAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<string>>();
+
+                var chatRequestMessage = ((MessageEnvelope<string>)innerMessage!).Content;
+                chatRequestMessage.Should().Be("hello");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var userMessage = "hello";
+        var chatRequestMessage = MessageEnvelope.Create(userMessage);
+        await agent.GenerateReplyAsync([chatRequestMessage]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenStrictModeIsTrueAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var userMessage = "hello";
+        var chatRequestMessage = MessageEnvelope.Create(userMessage);
+        Func<Task> action = async () => await agent.GenerateReplyAsync([chatRequestMessage]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: MessageEnvelope`1");
+    }
+
+    [Fact]
+    public async Task ItProcessAssistantTextMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("How can I help you?");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // assistant message
+        IMessage message = new TextMessage(Role.Assistant, "How can I help you?", "assistant");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessSystemTextMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestSystemMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("You are a helpful AI assistant");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // system message
+        IMessage message = new TextMessage(Role.System, "You are a helpful AI assistant");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessImageMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().BeNullOrEmpty();
+                chatRequestMessage.MultimodalContentItems.Count().Should().Be(1);
+                chatRequestMessage.MultimodalContentItems.First().Should().BeOfType<ChatMessageImageContentItem>();
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new ImageMessage(Role.User, "https://example.com/image.png", "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenProcessingImageMessageFromSelfAndStrictModeIsTrueAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        var imageMessage = new ImageMessage(Role.Assistant, "https://example.com/image.png", "assistant");
+        Func<Task> action = async () => await agent.GenerateReplyAsync([imageMessage]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: ImageMessage");
+    }
+
+    [Fact]
+    public async Task ItProcessMultiModalMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().BeNullOrEmpty();
+                chatRequestMessage.MultimodalContentItems.Count().Should().Be(2);
+                chatRequestMessage.MultimodalContentItems.First().Should().BeOfType<ChatMessageTextContentItem>();
+                chatRequestMessage.MultimodalContentItems.Last().Should().BeOfType<ChatMessageImageContentItem>();
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new MultiModalMessage(
+            Role.User,
+            [
+                new TextMessage(Role.User, "Hello", "user"),
+                new ImageMessage(Role.User, "https://example.com/image.png", "user"),
+            ], "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenProcessingMultiModalMessageFromSelfAndStrictModeIsTrueAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        var multiModalMessage = new MultiModalMessage(
+            Role.Assistant,
+            [
+                new TextMessage(Role.User, "Hello", "assistant"),
+                new ImageMessage(Role.User, "https://example.com/image.png", "assistant"),
+            ], "assistant");
+
+        Func<Task> action = async () => await agent.GenerateReplyAsync([multiModalMessage]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: MultiModalMessage");
+    }
+
+    [Fact]
+    public async Task ItProcessToolCallMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.ToolCalls.Count().Should().Be(1);
+                chatRequestMessage.Content.Should().Be("textContent");
+                chatRequestMessage.ToolCalls.First().Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                var functionToolCall = (ChatCompletionsFunctionToolCall)chatRequestMessage.ToolCalls.First();
+                functionToolCall.Name.Should().Be("test");
+                functionToolCall.Id.Should().Be("test");
+                functionToolCall.Arguments.Should().Be("test");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new ToolCallMessage("test", "test", "assistant")
+        {
+            Content = "textContent",
+        };
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessParallelToolCallMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().BeNullOrEmpty();
+                chatRequestMessage.ToolCalls.Count().Should().Be(2);
+                for (int i = 0; i < chatRequestMessage.ToolCalls.Count(); i++)
+                {
+                    chatRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                    var functionToolCall = (ChatCompletionsFunctionToolCall)chatRequestMessage.ToolCalls.ElementAt(i);
+                    functionToolCall.Name.Should().Be("test");
+                    functionToolCall.Id.Should().Be($"test_{i}");
+                    functionToolCall.Arguments.Should().Be("test");
+                }
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCalls = new[]
+        {
+            new ToolCall("test", "test"),
+            new ToolCall("test", "test"),
+        };
+        IMessage message = new ToolCallMessage(toolCalls, "assistant");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenProcessingToolCallMessageFromUserAndStrictModeIsTrueAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector(strictMode: true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        var toolCallMessage = new ToolCallMessage("test", "test", "user");
+        Func<Task> action = async () => await agent.GenerateReplyAsync([toolCallMessage]);
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: ToolCallMessage");
+    }
+
+    [Fact]
+    public async Task ItProcessToolCallResultMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("result");
+                chatRequestMessage.ToolCallId.Should().Be("test");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new ToolCallResultMessage("result", "test", "test", "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessParallelToolCallResultMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(2);
+
+                for (int i = 0; i < msgs.Count(); i++)
+                {
+                    var innerMessage = msgs.ElementAt(i);
+                    innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                    var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                    chatRequestMessage.Content.Should().Be("result");
+                    chatRequestMessage.ToolCallId.Should().Be($"test_{i}");
+                }
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCalls = new[]
+        {
+            new ToolCall("test", "test", "result"),
+            new ToolCall("test", "test", "result"),
+        };
+        IMessage message = new ToolCallResultMessage(toolCalls, "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessFunctionCallMiddlewareMessageFromUserAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(1);
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("result");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCallMessage = new ToolCallMessage("test", "test", "user");
+        var toolCallResultMessage = new ToolCallResultMessage("result", "test", "test", "user");
+        var aggregateMessage = new AggregateMessage<ToolCallMessage, ToolCallResultMessage>(toolCallMessage, toolCallResultMessage, "user");
+        await agent.GenerateReplyAsync([aggregateMessage]);
+    }
+
+    [Fact]
+    public async Task ItProcessFunctionCallMiddlewareMessageFromAssistantAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(2);
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("result");
+                chatRequestMessage.ToolCallId.Should().Be("test");
+
+                var toolCallMessage = msgs.First();
+                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var toolCallRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)toolCallMessage!).Content;
+                toolCallRequestMessage.Content.Should().BeNullOrEmpty();
+                toolCallRequestMessage.ToolCalls.Count().Should().Be(1);
+                toolCallRequestMessage.ToolCalls.First().Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                var functionToolCall = (ChatCompletionsFunctionToolCall)toolCallRequestMessage.ToolCalls.First();
+                functionToolCall.Name.Should().Be("test");
+                functionToolCall.Id.Should().Be("test");
+                functionToolCall.Arguments.Should().Be("test");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCallMessage = new ToolCallMessage("test", "test", "assistant");
+        var toolCallResultMessage = new ToolCallResultMessage("result", "test", "test", "assistant");
+        var aggregateMessage = new ToolCallAggregateMessage(toolCallMessage, toolCallResultMessage, "assistant");
+        await agent.GenerateReplyAsync([aggregateMessage]);
+    }
+
+    [Fact]
+    public async Task ItProcessParallelFunctionCallMiddlewareMessageFromAssistantAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(3);
+                var toolCallMessage = msgs.First();
+                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var toolCallRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)toolCallMessage!).Content;
+                toolCallRequestMessage.Content.Should().BeNullOrEmpty();
+                toolCallRequestMessage.ToolCalls.Count().Should().Be(2);
+
+                for (int i = 0; i < toolCallRequestMessage.ToolCalls.Count(); i++)
+                {
+                    toolCallRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                    var functionToolCall = (ChatCompletionsFunctionToolCall)toolCallRequestMessage.ToolCalls.ElementAt(i);
+                    functionToolCall.Name.Should().Be("test");
+                    functionToolCall.Id.Should().Be($"test_{i}");
+                    functionToolCall.Arguments.Should().Be("test");
+                }
+
+                for (int i = 1; i < msgs.Count(); i++)
+                {
+                    var toolCallResultMessage = msgs.ElementAt(i);
+                    toolCallResultMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                    var toolCallResultRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)toolCallResultMessage!).Content;
+                    toolCallResultRequestMessage.Content.Should().Be("result");
+                    toolCallResultRequestMessage.ToolCallId.Should().Be($"test_{i - 1}");
+                }
+
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCalls = new[]
+        {
+            new ToolCall("test", "test", "result"),
+            new ToolCall("test", "test", "result"),
+        };
+        var toolCallMessage = new ToolCallMessage(toolCalls, "assistant");
+        var toolCallResultMessage = new ToolCallResultMessage(toolCalls, "assistant");
+        var aggregateMessage = new AggregateMessage<ToolCallMessage, ToolCallResultMessage>(toolCallMessage, toolCallResultMessage, "assistant");
+        await agent.GenerateReplyAsync([aggregateMessage]);
+    }
+
+    [Fact]
+    public async Task ItConvertChatResponseMessageToTextMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // text message
+        var textMessage = CreateInstance<ChatResponseMessage>(ChatRole.Assistant, "hello");
+        var chatRequestMessage = MessageEnvelope.Create(textMessage);
+
+        var message = await agent.GenerateReplyAsync([chatRequestMessage]);
+        message.Should().BeOfType<TextMessage>();
+        message.GetContent().Should().Be("hello");
+        message.GetRole().Should().Be(Role.Assistant);
+    }
+
+    [Fact]
+    public async Task ItConvertChatResponseMessageToToolCallMessageAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // tool call message
+        var toolCallMessage = CreateInstance<ChatResponseMessage>(ChatRole.Assistant, "textContent", new[] { new ChatCompletionsFunctionToolCall("test", "test", "test") }, new Dictionary<string, BinaryData>());
+        var chatRequestMessage = MessageEnvelope.Create(toolCallMessage);
+        var message = await agent.GenerateReplyAsync([chatRequestMessage]);
+        message.Should().BeOfType<ToolCallMessage>();
+        message.GetToolCalls()!.Count().Should().Be(1);
+        message.GetToolCalls()!.First().FunctionName.Should().Be("test");
+        message.GetToolCalls()!.First().FunctionArguments.Should().Be("test");
+        message.GetContent().Should().Be("textContent");
+    }
+
+    [Fact]
+    public async Task ItReturnOriginalMessageWhenStrictModeIsFalseAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // text message
+        var textMessage = "hello";
+        var messageToSend = MessageEnvelope.Create(textMessage);
+
+        var message = await agent.GenerateReplyAsync([messageToSend]);
+        message.Should().BeOfType<MessageEnvelope<string>>();
+    }
+
+    [Fact]
+    public async Task ItThrowInvalidOperationExceptionWhenStrictModeIsTrueAsync()
+    {
+        var middleware = new AzureAIInferenceChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // text message
+        var textMessage = new ChatRequestUserMessage("hello");
+        var messageToSend = MessageEnvelope.Create(textMessage);
+        Func<Task> action = async () => await agent.GenerateReplyAsync([messageToSend]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid return message type MessageEnvelope`1");
+    }
+
+    [Fact]
+    public void ToOpenAIChatRequestMessageShortCircuitTest()
+    {
+        var agent = new EchoAgent("assistant");
+        var middleware = new AzureAIInferenceChatRequestMessageConnector();
+        ChatRequestMessage[] messages =
+            [
+                new ChatRequestUserMessage("Hello"),
+                new ChatRequestAssistantMessage()
+                {
+                    Content = "How can I help you?",
+                },
+                new ChatRequestSystemMessage("You are a helpful AI assistant"),
+                new ChatRequestToolMessage("test", "test"),
+            ];
+
+        foreach (var oaiMessage in messages)
+        {
+            IMessage message = new MessageEnvelope<ChatRequestMessage>(oaiMessage);
+            var oaiMessages = middleware.ProcessIncomingMessages(agent, [message]);
+            oaiMessages.Count().Should().Be(1);
+            //oaiMessages.First().Should().BeOfType<IMessage<ChatRequestMessage>>();
+            if (oaiMessages.First() is IMessage<ChatRequestMessage> chatRequestMessage)
+            {
+                chatRequestMessage.Content.Should().Be(oaiMessage);
+            }
+            else
+            {
+                // fail the test
+                Assert.True(false);
+            }
+        }
+    }
+
+    private static T CreateInstance<T>(params object[] args)
+    {
+        var type = typeof(T);
+        var instance = type.Assembly.CreateInstance(
+            type.FullName!, false,
+            BindingFlags.Instance | BindingFlags.NonPublic,
+            null, args, null, null);
+        return (T)instance!;
+    }
+}
diff --git a/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj b/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj
index 7f7001a877d..8676762015d 100644
--- a/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj
+++ b/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj
@@ -13,4 +13,9 @@
     <ProjectReference Include="..\AutoGen.Tests\AutoGen.Tests.csproj" />
   </ItemGroup>
 
+  <ItemGroup>
+    <PackageReference Include="Microsoft.PowerShell.SDK" Version="$(PowershellSDKVersion)" />
+  </ItemGroup>
+
+
 </Project>
diff --git a/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveServiceTest.cs b/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveServiceTest.cs
index 0e36053c45e..aeec23a758b 100644
--- a/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveServiceTest.cs
+++ b/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveServiceTest.cs
@@ -7,6 +7,7 @@
 
 namespace AutoGen.DotnetInteractive.Tests;
 
+[Collection("Sequential")]
 public class DotnetInteractiveServiceTest : IDisposable
 {
     private ITestOutputHelper _output;
diff --git a/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveStdioKernelConnectorTests.cs b/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveStdioKernelConnectorTests.cs
new file mode 100644
index 00000000000..520d00c04c6
--- /dev/null
+++ b/dotnet/test/AutoGen.DotnetInteractive.Tests/DotnetInteractiveStdioKernelConnectorTests.cs
@@ -0,0 +1,85 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// DotnetInteractiveStdioKernelConnectorTests.cs
+
+using AutoGen.DotnetInteractive.Extension;
+using FluentAssertions;
+using Microsoft.DotNet.Interactive;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace AutoGen.DotnetInteractive.Tests;
+
+[Collection("Sequential")]
+public class DotnetInteractiveStdioKernelConnectorTests : IDisposable
+{
+    private string _workingDir;
+    private Kernel kernel;
+    public DotnetInteractiveStdioKernelConnectorTests(ITestOutputHelper output)
+    {
+        _workingDir = Path.Combine(Path.GetTempPath(), "test", Path.GetRandomFileName());
+        if (!Directory.Exists(_workingDir))
+        {
+            Directory.CreateDirectory(_workingDir);
+        }
+
+        kernel = DotnetInteractiveKernelBuilder
+            .CreateKernelBuilder(_workingDir)
+            .RestoreDotnetInteractive()
+            .AddPythonKernel("python3")
+            .BuildAsync().Result;
+    }
+
+
+    [Fact]
+    public async Task ItAddCSharpKernelTestAsync()
+    {
+        var csharpCode = """
+            #r "nuget:Microsoft.ML, 1.5.2"
+            var str = "Hello" + ", World!";
+            Console.WriteLine(str);
+            """;
+
+        var result = await this.kernel.RunSubmitCodeCommandAsync(csharpCode, "csharp");
+        result.Should().Contain("Hello, World!");
+    }
+
+    [Fact]
+    public async Task ItAddPowershellKernelTestAsync()
+    {
+        var powershellCode = @"
+            Write-Host 'Hello, World!'
+            ";
+
+        var result = await this.kernel.RunSubmitCodeCommandAsync(powershellCode, "pwsh");
+        result.Should().Contain("Hello, World!");
+    }
+
+    [Fact]
+    public async Task ItAddFSharpKernelTestAsync()
+    {
+        var fsharpCode = """
+            printfn "Hello, World!"
+            """;
+
+        var result = await this.kernel.RunSubmitCodeCommandAsync(fsharpCode, "fsharp");
+        result.Should().Contain("Hello, World!");
+    }
+
+    [Fact]
+    public async Task ItAddPythonKernelTestAsync()
+    {
+        var pythonCode = """
+            %pip install numpy
+            str = 'Hello' + ', World!'
+            print(str)
+            """;
+
+        var result = await this.kernel.RunSubmitCodeCommandAsync(pythonCode, "python");
+        result.Should().Contain("Hello, World!");
+    }
+
+    public void Dispose()
+    {
+        this.kernel.Dispose();
+    }
+}
diff --git a/dotnet/test/AutoGen.DotnetInteractive.Tests/InProcessDotnetInteractiveKernelBuilderTest.cs b/dotnet/test/AutoGen.DotnetInteractive.Tests/InProcessDotnetInteractiveKernelBuilderTest.cs
new file mode 100644
index 00000000000..fe2de74dd30
--- /dev/null
+++ b/dotnet/test/AutoGen.DotnetInteractive.Tests/InProcessDotnetInteractiveKernelBuilderTest.cs
@@ -0,0 +1,79 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// InProcessDotnetInteractiveKernelBuilderTest.cs
+
+using AutoGen.DotnetInteractive.Extension;
+using FluentAssertions;
+using Xunit;
+
+namespace AutoGen.DotnetInteractive.Tests;
+
+[Collection("Sequential")]
+public class InProcessDotnetInteractiveKernelBuilderTest
+{
+    [Fact]
+    public async Task ItAddCSharpKernelTestAsync()
+    {
+        using var kernel = DotnetInteractiveKernelBuilder
+            .CreateEmptyInProcessKernelBuilder()
+            .AddCSharpKernel()
+            .Build();
+
+        var csharpCode = """
+            #r "nuget:Microsoft.ML, 1.5.2"
+            Console.WriteLine("Hello, World!");
+            """;
+
+        var result = await kernel.RunSubmitCodeCommandAsync(csharpCode, "csharp");
+        result.Should().Contain("Hello, World!");
+    }
+
+    [Fact]
+    public async Task ItAddPowershellKernelTestAsync()
+    {
+        using var kernel = DotnetInteractiveKernelBuilder
+            .CreateEmptyInProcessKernelBuilder()
+            .AddPowershellKernel()
+            .Build();
+
+        var powershellCode = @"
+            Write-Host 'Hello, World!'
+            ";
+
+        var result = await kernel.RunSubmitCodeCommandAsync(powershellCode, "pwsh");
+        result.Should().Contain("Hello, World!");
+    }
+
+    [Fact]
+    public async Task ItAddFSharpKernelTestAsync()
+    {
+        using var kernel = DotnetInteractiveKernelBuilder
+            .CreateEmptyInProcessKernelBuilder()
+            .AddFSharpKernel()
+            .Build();
+
+        var fsharpCode = """
+            #r "nuget:Microsoft.ML, 1.5.2"
+            printfn "Hello, World!"
+            """;
+
+        var result = await kernel.RunSubmitCodeCommandAsync(fsharpCode, "fsharp");
+        result.Should().Contain("Hello, World!");
+    }
+
+    [Fact]
+    public async Task ItAddPythonKernelTestAsync()
+    {
+        using var kernel = DotnetInteractiveKernelBuilder
+            .CreateEmptyInProcessKernelBuilder()
+            .AddPythonKernel("python3")
+            .Build();
+
+        var pythonCode = """
+            %pip install numpy
+            print('Hello, World!')
+            """;
+
+        var result = await kernel.RunSubmitCodeCommandAsync(pythonCode, "python");
+        result.Should().Contain("Hello, World!");
+    }
+}
diff --git a/dotnet/test/AutoGen.DotnetInteractive.Tests/MessageExtensionTests.cs b/dotnet/test/AutoGen.DotnetInteractive.Tests/MessageExtensionTests.cs
new file mode 100644
index 00000000000..a886ef4985d
--- /dev/null
+++ b/dotnet/test/AutoGen.DotnetInteractive.Tests/MessageExtensionTests.cs
@@ -0,0 +1,84 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// MessageExtensionTests.cs
+
+using AutoGen.Core;
+using AutoGen.DotnetInteractive.Extension;
+using FluentAssertions;
+using Xunit;
+
+namespace AutoGen.DotnetInteractive.Tests;
+
+public class MessageExtensionTests
+{
+    [Fact]
+    public void ExtractCodeBlock_WithSingleCodeBlock_ShouldReturnCodeBlock()
+    {
+        // Arrange
+        var message = new TextMessage(Role.Assistant, "```csharp\nConsole.WriteLine(\"Hello, World!\");\n```");
+        var codeBlockPrefix = "```csharp";
+        var codeBlockSuffix = "```";
+
+        // Act
+        var codeBlock = message.ExtractCodeBlock(codeBlockPrefix, codeBlockSuffix);
+
+        codeBlock.Should().BeEquivalentTo("Console.WriteLine(\"Hello, World!\");");
+    }
+
+    [Fact]
+    public void ExtractCodeBlock_WithMultipleCodeBlocks_ShouldReturnFirstCodeBlock()
+    {
+        // Arrange
+        var message = new TextMessage(Role.Assistant, "```csharp\nConsole.WriteLine(\"Hello, World!\");\n```\n```csharp\nConsole.WriteLine(\"Hello, World!\");\n```");
+        var codeBlockPrefix = "```csharp";
+        var codeBlockSuffix = "```";
+
+        // Act
+        var codeBlock = message.ExtractCodeBlock(codeBlockPrefix, codeBlockSuffix);
+
+        codeBlock.Should().BeEquivalentTo("Console.WriteLine(\"Hello, World!\");");
+    }
+
+    [Fact]
+    public void ExtractCodeBlock_WithNoCodeBlock_ShouldReturnNull()
+    {
+        // Arrange
+        var message = new TextMessage(Role.Assistant, "Hello, World!");
+        var codeBlockPrefix = "```csharp";
+        var codeBlockSuffix = "```";
+
+        // Act
+        var codeBlock = message.ExtractCodeBlock(codeBlockPrefix, codeBlockSuffix);
+
+        codeBlock.Should().BeNull();
+    }
+
+    [Fact]
+    public void ExtractCodeBlocks_WithMultipleCodeBlocks_ShouldReturnAllCodeBlocks()
+    {
+        // Arrange
+        var message = new TextMessage(Role.Assistant, "```csharp\nConsole.WriteLine(\"Hello, World!\");\n```\n```csharp\nConsole.WriteLine(\"Hello, World!\");\n```");
+        var codeBlockPrefix = "```csharp";
+        var codeBlockSuffix = "```";
+
+        // Act
+        var codeBlocks = message.ExtractCodeBlocks(codeBlockPrefix, codeBlockSuffix);
+
+        codeBlocks.Should().HaveCount(2);
+        codeBlocks.ElementAt(0).Should().BeEquivalentTo("Console.WriteLine(\"Hello, World!\");");
+        codeBlocks.ElementAt(1).Should().BeEquivalentTo("Console.WriteLine(\"Hello, World!\");");
+    }
+
+    [Fact]
+    public void ExtractCodeBlocks_WithNoCodeBlock_ShouldReturnEmpty()
+    {
+        // Arrange
+        var message = new TextMessage(Role.Assistant, "Hello, World!");
+        var codeBlockPrefix = "```csharp";
+        var codeBlockSuffix = "```";
+
+        // Act
+        var codeBlocks = message.ExtractCodeBlocks(codeBlockPrefix, codeBlockSuffix);
+
+        codeBlocks.Should().BeEmpty();
+    }
+}
diff --git a/dotnet/test/AutoGen.OpenAI.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt b/dotnet/test/AutoGen.OpenAI.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt
index e8e9af84dbd..3574e593d8d 100644
--- a/dotnet/test/AutoGen.OpenAI.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt
+++ b/dotnet/test/AutoGen.OpenAI.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt
@@ -1,11 +1,21 @@
-[
+﻿[
   {
     "OriginalMessage": "TextMessage(system, You are a helpful AI assistant, )",
     "ConvertedMessages": [
       {
         "Name": null,
         "Role": "system",
-        "Content": "You are a helpful AI assistant"
+        "Content": [
+          {
+            "Kind": {},
+            "Text": "You are a helpful AI assistant",
+            "Refusal": null,
+            "ImageUri": null,
+            "ImageBytes": null,
+            "ImageBytesMediaType": null,
+            "ImageDetail": null
+          }
+        ]
       }
     ]
   },
@@ -14,9 +24,24 @@
     "ConvertedMessages": [
       {
         "Role": "user",
-        "Content": "Hello",
+        "Content": [
+          {
+            "Kind": {},
+            "Text": "Hello",
+            "Refusal": null,
+            "ImageUri": null,
+            "ImageBytes": null,
+            "ImageBytesMediaType": null,
+            "ImageDetail": null
+          }
+        ],
         "Name": "user",
-        "MultiModaItem": null
+        "MultiModaItem": [
+          {
+            "Type": "Text",
+            "Text": "Hello"
+          }
+        ]
       }
     ]
   },
@@ -25,7 +50,17 @@
     "ConvertedMessages": [
       {
         "Role": "assistant",
-        "Content": "How can I help you?",
+        "Content": [
+          {
+            "Kind": {},
+            "Text": "How can I help you?",
+            "Refusal": null,
+            "ImageUri": null,
+            "ImageBytes": null,
+            "ImageBytesMediaType": null,
+            "ImageDetail": null
+          }
+        ],
         "Name": "assistant",
         "TooCall": [],
         "FunctionCallName": null,
@@ -38,15 +73,22 @@
     "ConvertedMessages": [
       {
         "Role": "user",
-        "Content": null,
+        "Content": [
+          {
+            "Kind": {},
+            "Text": null,
+            "Refusal": null,
+            "ImageUri": "https://example.com/image.png",
+            "ImageBytes": null,
+            "ImageBytesMediaType": null,
+            "ImageDetail": null
+          }
+        ],
         "Name": "user",
         "MultiModaItem": [
           {
             "Type": "Image",
-            "ImageUrl": {
-              "Url": "https://example.com/image.png",
-              "Detail": null
-            }
+            "ImageUrl": "https://example.com/image.png"
           }
         ]
       }
@@ -57,7 +99,26 @@
     "ConvertedMessages": [
       {
         "Role": "user",
-        "Content": null,
+        "Content": [
+          {
+            "Kind": {},
+            "Text": "Hello",
+            "Refusal": null,
+            "ImageUri": null,
+            "ImageBytes": null,
+            "ImageBytesMediaType": null,
+            "ImageDetail": null
+          },
+          {
+            "Kind": {},
+            "Text": null,
+            "Refusal": null,
+            "ImageUri": "https://example.com/image.png",
+            "ImageBytes": null,
+            "ImageBytesMediaType": null,
+            "ImageDetail": null
+          }
+        ],
         "Name": "user",
         "MultiModaItem": [
           {
@@ -66,10 +127,7 @@
           },
           {
             "Type": "Image",
-            "ImageUrl": {
-              "Url": "https://example.com/image.png",
-              "Detail": null
-            }
+            "ImageUrl": "https://example.com/image.png"
           }
         ]
       }
@@ -80,7 +138,7 @@
     "ConvertedMessages": [
       {
         "Role": "assistant",
-        "Content": "",
+        "Content": [],
         "Name": "assistant",
         "TooCall": [
           {
@@ -125,7 +183,7 @@
     "ConvertedMessages": [
       {
         "Role": "assistant",
-        "Content": "",
+        "Content": [],
         "Name": "assistant",
         "TooCall": [
           {
@@ -151,7 +209,7 @@
     "ConvertedMessages": [
       {
         "Role": "assistant",
-        "Content": "",
+        "Content": [],
         "Name": "assistant",
         "TooCall": [
           {
diff --git a/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj b/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj
index b176bc3e6ac..a6495fc4487 100644
--- a/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj
+++ b/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj
@@ -8,18 +8,12 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\..\sample\AutoGen.OpenAI.Sample\AutoGen.OpenAI.Sample.csproj" />
-    <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
-    <ProjectReference Include="..\..\src\AutoGen\AutoGen.csproj" />
-    <ProjectReference Include="..\AutoGen.Tests\AutoGen.Tests.csproj" />
+    <PackageReference Include="Azure.AI.OpenAI" Version="$(AzureOpenAIV2Version)" />
   </ItemGroup>
 
   <ItemGroup>
-    <None Update="ApprovalTests\OpenAIMessageTests.BasicMessageTest.approved.txt">
-      <ParentFile>$([System.String]::Copy('%(FileName)').Split('.')[0])</ParentFile>
-      <ParentExtension>$(ProjectExt.Replace('proj', ''))</ParentExtension>
-      <DependentUpon>%(ParentFile)%(ParentExtension)</DependentUpon>
-    </None>
+    <ProjectReference Include="..\..\src\AutoGen.OpenAI\AutoGen.OpenAI.csproj" />
+    <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
+    <ProjectReference Include="..\AutoGen.Test.Share\AutoGen.Tests.Share.csproj" />
   </ItemGroup>
-
 </Project>
diff --git a/dotnet/test/AutoGen.OpenAI.Tests/MathClassTest.cs b/dotnet/test/AutoGen.OpenAI.Tests/MathClassTest.cs
index 01af3d4646c..be1c38ad0a3 100644
--- a/dotnet/test/AutoGen.OpenAI.Tests/MathClassTest.cs
+++ b/dotnet/test/AutoGen.OpenAI.Tests/MathClassTest.cs
@@ -10,6 +10,7 @@
 using AutoGen.Tests;
 using Azure.AI.OpenAI;
 using FluentAssertions;
+using OpenAI;
 using Xunit.Abstractions;
 
 namespace AutoGen.OpenAI.Tests
@@ -102,7 +103,7 @@ public async Task OpenAIAgentMathChatTestAsync()
             var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new ArgumentException("AZURE_OPENAI_API_KEY is not set");
             var endPoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new ArgumentException("AZURE_OPENAI_ENDPOINT is not set");
             var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new ArgumentException("AZURE_OPENAI_DEPLOY_NAME is not set");
-            var openaiClient = new OpenAIClient(new Uri(endPoint), new Azure.AzureKeyCredential(key));
+            var openaiClient = new AzureOpenAIClient(new Uri(endPoint), new Azure.AzureKeyCredential(key));
             var teacher = await CreateTeacherAgentAsync(openaiClient, deployName);
             var student = await CreateStudentAssistantAgentAsync(openaiClient, deployName);
 
@@ -113,8 +114,7 @@ public async Task OpenAIAgentMathChatTestAsync()
                     { this.UpdateProgressFunctionContract.Name, this.UpdateProgressWrapper },
                 });
             var admin = new OpenAIChatAgent(
-                openAIClient: openaiClient,
-                modelName: deployName,
+                chatClient: openaiClient.GetChatClient(deployName),
                 name: "Admin",
                 systemMessage: $@"You are admin. You update progress after each question is answered.")
                 .RegisterMessageConnector()
@@ -122,8 +122,7 @@ public async Task OpenAIAgentMathChatTestAsync()
                 .RegisterMiddleware(Print);
 
             var groupAdmin = new OpenAIChatAgent(
-                openAIClient: openaiClient,
-                modelName: deployName,
+                chatClient: openaiClient.GetChatClient(deployName),
                 name: "GroupAdmin",
                 systemMessage: "You are group admin. You manage the group chat.")
                 .RegisterMessageConnector()
@@ -142,13 +141,12 @@ private async Task<IAgent> CreateTeacherAgentAsync(OpenAIClient client, string m
                 });
 
             var teacher = new OpenAIChatAgent(
-                openAIClient: client,
+                chatClient: client.GetChatClient(model),
                 name: "Teacher",
                 systemMessage: @"You are a preschool math teacher.
 You create math question and ask student to answer it.
 Then you check if the answer is correct.
-If the answer is wrong, you ask student to fix it",
-                modelName: model)
+If the answer is wrong, you ask student to fix it")
                 .RegisterMessageConnector()
                 .RegisterStreamingMiddleware(functionCallMiddleware)
                 .RegisterMiddleware(Print);
@@ -165,9 +163,8 @@ private async Task<IAgent> CreateStudentAssistantAgentAsync(OpenAIClient client,
                     { this.AnswerQuestionFunctionContract.Name!, this.AnswerQuestionWrapper },
                 });
             var student = new OpenAIChatAgent(
-                openAIClient: client,
+                chatClient: client.GetChatClient(model),
                 name: "Student",
-                modelName: model,
                 systemMessage: @"You are a student. You answer math question from teacher.")
                 .RegisterMessageConnector()
                 .RegisterStreamingMiddleware(functionCallMiddleware)
diff --git a/dotnet/test/AutoGen.OpenAI.Tests/OpenAIChatAgentTest.cs b/dotnet/test/AutoGen.OpenAI.Tests/OpenAIChatAgentTest.cs
index 85f898547b0..bcbfee6e208 100644
--- a/dotnet/test/AutoGen.OpenAI.Tests/OpenAIChatAgentTest.cs
+++ b/dotnet/test/AutoGen.OpenAI.Tests/OpenAIChatAgentTest.cs
@@ -9,6 +9,8 @@
 using AutoGen.Tests;
 using Azure.AI.OpenAI;
 using FluentAssertions;
+using OpenAI;
+using OpenAI.Chat;
 
 namespace AutoGen.OpenAI.Tests;
 
@@ -31,27 +33,26 @@ public async Task BasicConversationTestAsync()
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
         var openaiClient = CreateOpenAIClientFromAzureOpenAI();
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: deployName);
+            chatClient: openaiClient.GetChatClient(deployName),
+            name: "assistant");
 
         // By default, OpenAIChatClient supports the following message types
         // - IMessage<ChatRequestMessage>
-        var chatMessageContent = MessageEnvelope.Create(new ChatRequestUserMessage("Hello"));
+        var chatMessageContent = MessageEnvelope.Create(new UserChatMessage("Hello"));
         var reply = await openAIChatAgent.SendAsync(chatMessageContent);
 
-        reply.Should().BeOfType<MessageEnvelope<ChatCompletions>>();
-        reply.As<MessageEnvelope<ChatCompletions>>().From.Should().Be("assistant");
-        reply.As<MessageEnvelope<ChatCompletions>>().Content.Choices.First().Message.Role.Should().Be(ChatRole.Assistant);
-        reply.As<MessageEnvelope<ChatCompletions>>().Content.Usage.TotalTokens.Should().BeGreaterThan(0);
+        reply.Should().BeOfType<MessageEnvelope<ChatCompletion>>();
+        reply.As<MessageEnvelope<ChatCompletion>>().From.Should().Be("assistant");
+        reply.As<MessageEnvelope<ChatCompletion>>().Content.Role.Should().Be(ChatMessageRole.Assistant);
+        reply.As<MessageEnvelope<ChatCompletion>>().Content.Usage.TotalTokens.Should().BeGreaterThan(0);
 
         // test streaming
         var streamingReply = openAIChatAgent.GenerateStreamingReplyAsync(new[] { chatMessageContent });
 
         await foreach (var streamingMessage in streamingReply)
         {
-            streamingMessage.Should().BeOfType<MessageEnvelope<StreamingChatCompletionsUpdate>>();
-            streamingMessage.As<MessageEnvelope<StreamingChatCompletionsUpdate>>().From.Should().Be("assistant");
+            streamingMessage.Should().BeOfType<MessageEnvelope<StreamingChatCompletionUpdate>>();
+            streamingMessage.As<MessageEnvelope<StreamingChatCompletionUpdate>>().From.Should().Be("assistant");
         }
     }
 
@@ -61,16 +62,15 @@ public async Task OpenAIChatMessageContentConnectorTestAsync()
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
         var openaiClient = CreateOpenAIClientFromAzureOpenAI();
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: deployName);
+            chatClient: openaiClient.GetChatClient(deployName),
+            name: "assistant");
 
         MiddlewareStreamingAgent<OpenAIChatAgent> assistant = openAIChatAgent
             .RegisterMessageConnector();
 
         var messages = new IMessage[]
         {
-            MessageEnvelope.Create(new ChatRequestUserMessage("Hello")),
+            MessageEnvelope.Create(new UserChatMessage("Hello")),
             new TextMessage(Role.Assistant, "Hello", from: "user"),
             new MultiModalMessage(Role.Assistant,
                 [
@@ -106,9 +106,8 @@ public async Task OpenAIChatAgentToolCallTestAsync()
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
         var openaiClient = CreateOpenAIClientFromAzureOpenAI();
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: deployName);
+            chatClient: openaiClient.GetChatClient(deployName),
+            name: "assistant");
 
         var functionCallMiddleware = new FunctionCallMiddleware(
             functions: [this.GetWeatherAsyncFunctionContract]);
@@ -122,7 +121,7 @@ public async Task OpenAIChatAgentToolCallTestAsync()
         var question = "What's the weather in Seattle";
         var messages = new IMessage[]
         {
-            MessageEnvelope.Create(new ChatRequestUserMessage(question)),
+            MessageEnvelope.Create(new UserChatMessage(question)),
             new TextMessage(Role.Assistant, question, from: "user"),
             new MultiModalMessage(Role.Assistant,
                 [
@@ -148,16 +147,14 @@ public async Task OpenAIChatAgentToolCallTestAsync()
             ToolCallMessage? toolCallMessage = null;
             await foreach (var streamingMessage in reply)
             {
-                streamingMessage.Should().BeOfType<ToolCallMessageUpdate>();
-                streamingMessage.As<ToolCallMessageUpdate>().From.Should().Be("assistant");
-                if (toolCallMessage is null)
+                if (streamingMessage is ToolCallMessage finalMessage)
                 {
-                    toolCallMessage = new ToolCallMessage(streamingMessage.As<ToolCallMessageUpdate>());
-                }
-                else
-                {
-                    toolCallMessage.Update(streamingMessage.As<ToolCallMessageUpdate>());
+                    toolCallMessage = finalMessage;
+                    break;
                 }
+
+                streamingMessage.Should().BeOfType<ToolCallMessageUpdate>();
+                streamingMessage.As<ToolCallMessageUpdate>().From.Should().Be("assistant");
             }
 
             toolCallMessage.Should().NotBeNull();
@@ -173,9 +170,8 @@ public async Task OpenAIChatAgentToolCallInvokingTestAsync()
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
         var openaiClient = CreateOpenAIClientFromAzureOpenAI();
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: deployName);
+            chatClient: openaiClient.GetChatClient(deployName),
+            name: "assistant");
 
         var functionCallMiddleware = new FunctionCallMiddleware(
             functions: [this.GetWeatherAsyncFunctionContract],
@@ -189,7 +185,7 @@ public async Task OpenAIChatAgentToolCallInvokingTestAsync()
         var question = "What's the weather in Seattle";
         var messages = new IMessage[]
         {
-            MessageEnvelope.Create(new ChatRequestUserMessage(question)),
+            MessageEnvelope.Create(new UserChatMessage(question)),
             new TextMessage(Role.Assistant, question, from: "user"),
             new MultiModalMessage(Role.Assistant,
                 [
@@ -234,14 +230,14 @@ public async Task ItCreateOpenAIChatAgentWithChatCompletionOptionAsync()
     {
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
         var openaiClient = CreateOpenAIClientFromAzureOpenAI();
-        var options = new ChatCompletionsOptions(deployName, [])
+        var options = new ChatCompletionOptions()
         {
             Temperature = 0.7f,
             MaxTokens = 1,
         };
 
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
+            chatClient: openaiClient.GetChatClient(deployName),
             name: "assistant",
             options: options)
             .RegisterMessageConnector();
@@ -250,30 +246,11 @@ public async Task ItCreateOpenAIChatAgentWithChatCompletionOptionAsync()
         respond.GetContent()?.Should().NotBeNullOrEmpty();
     }
 
-    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
-    public async Task ItThrowExceptionWhenChatCompletionOptionContainsMessages()
-    {
-        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
-        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
-        var options = new ChatCompletionsOptions(deployName, [new ChatRequestUserMessage("hi")])
-        {
-            Temperature = 0.7f,
-            MaxTokens = 1,
-        };
-
-        var action = () => new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            options: options)
-            .RegisterMessageConnector();
-
-        action.Should().ThrowExactly<ArgumentException>().WithMessage("Messages should not be provided in options");
-    }
 
     private OpenAIClient CreateOpenAIClientFromAzureOpenAI()
     {
         var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
         var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
-        return new OpenAIClient(new Uri(endpoint), new Azure.AzureKeyCredential(key));
+        return new AzureOpenAIClient(new Uri(endpoint), new Azure.AzureKeyCredential(key));
     }
 }
diff --git a/dotnet/test/AutoGen.OpenAI.Tests/OpenAIMessageTests.cs b/dotnet/test/AutoGen.OpenAI.Tests/OpenAIMessageTests.cs
index a9b852e0d8c..a05f440a17b 100644
--- a/dotnet/test/AutoGen.OpenAI.Tests/OpenAIMessageTests.cs
+++ b/dotnet/test/AutoGen.OpenAI.Tests/OpenAIMessageTests.cs
@@ -11,8 +11,8 @@
 using ApprovalTests.Namers;
 using ApprovalTests.Reporters;
 using AutoGen.Tests;
-using Azure.AI.OpenAI;
 using FluentAssertions;
+using OpenAI.Chat;
 using Xunit;
 
 namespace AutoGen.OpenAI.Tests;
@@ -71,10 +71,10 @@ public async Task ItProcessUserTextMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("Hello");
-                chatRequestMessage.Name.Should().Be("user");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (UserChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("Hello");
+                chatRequestMessage.ParticipantName.Should().Be("user");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -92,16 +92,16 @@ public async Task ItShortcutChatRequestMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestUserMessage>>();
+                innerMessage!.Should().BeOfType<MessageEnvelope<UserChatMessage>>();
 
-                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestUserMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("hello");
+                var chatRequestMessage = (UserChatMessage)((MessageEnvelope<UserChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("hello");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
 
         // user message
-        var userMessage = new ChatRequestUserMessage("hello");
+        var userMessage = new UserChatMessage("hello");
         var chatRequestMessage = MessageEnvelope.Create(userMessage);
         await agent.GenerateReplyAsync([chatRequestMessage]);
     }
@@ -151,10 +151,10 @@ public async Task ItProcessAssistantTextMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("How can I help you?");
-                chatRequestMessage.Name.Should().Be("assistant");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (AssistantChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("How can I help you?");
+                chatRequestMessage.ParticipantName.Should().Be("assistant");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -172,9 +172,9 @@ public async Task ItProcessSystemTextMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestSystemMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("You are a helpful AI assistant");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (SystemChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("You are a helpful AI assistant");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -192,12 +192,11 @@ public async Task ItProcessImageMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().BeNullOrEmpty();
-                chatRequestMessage.Name.Should().Be("user");
-                chatRequestMessage.MultimodalContentItems.Count().Should().Be(1);
-                chatRequestMessage.MultimodalContentItems.First().Should().BeOfType<ChatMessageImageContentItem>();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (UserChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.ParticipantName.Should().Be("user");
+                chatRequestMessage.Content.Count().Should().Be(1);
+                chatRequestMessage.Content.First().Kind.Should().Be(ChatMessageContentPartKind.Image);
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -228,13 +227,12 @@ public async Task ItProcessMultiModalMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().BeNullOrEmpty();
-                chatRequestMessage.Name.Should().Be("user");
-                chatRequestMessage.MultimodalContentItems.Count().Should().Be(2);
-                chatRequestMessage.MultimodalContentItems.First().Should().BeOfType<ChatMessageTextContentItem>();
-                chatRequestMessage.MultimodalContentItems.Last().Should().BeOfType<ChatMessageImageContentItem>();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (UserChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.ParticipantName.Should().Be("user");
+                chatRequestMessage.Content.Count().Should().Be(2);
+                chatRequestMessage.Content.First().Kind.Should().Be(ChatMessageContentPartKind.Text);
+                chatRequestMessage.Content.Last().Kind.Should().Be(ChatMessageContentPartKind.Image);
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -276,16 +274,16 @@ public async Task ItProcessToolCallMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Name.Should().Be("assistant");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (AssistantChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.ParticipantName.Should().Be("assistant");
                 chatRequestMessage.ToolCalls.Count().Should().Be(1);
-                chatRequestMessage.Content.Should().Be("textContent");
-                chatRequestMessage.ToolCalls.First().Should().BeOfType<ChatCompletionsFunctionToolCall>();
-                var functionToolCall = (ChatCompletionsFunctionToolCall)chatRequestMessage.ToolCalls.First();
-                functionToolCall.Name.Should().Be("test");
+                chatRequestMessage.Content.First().Text.Should().Be("textContent");
+                chatRequestMessage.ToolCalls.First().Should().BeOfType<ChatToolCall>();
+                var functionToolCall = (ChatToolCall)chatRequestMessage.ToolCalls.First();
+                functionToolCall.FunctionName.Should().Be("test");
                 functionToolCall.Id.Should().Be("test");
-                functionToolCall.Arguments.Should().Be("test");
+                functionToolCall.FunctionArguments.Should().Be("test");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -306,18 +304,18 @@ public async Task ItProcessParallelToolCallMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (AssistantChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
                 chatRequestMessage.Content.Should().BeNullOrEmpty();
-                chatRequestMessage.Name.Should().Be("assistant");
+                chatRequestMessage.ParticipantName.Should().Be("assistant");
                 chatRequestMessage.ToolCalls.Count().Should().Be(2);
                 for (int i = 0; i < chatRequestMessage.ToolCalls.Count(); i++)
                 {
-                    chatRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatCompletionsFunctionToolCall>();
-                    var functionToolCall = (ChatCompletionsFunctionToolCall)chatRequestMessage.ToolCalls.ElementAt(i);
-                    functionToolCall.Name.Should().Be("test");
+                    chatRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatToolCall>();
+                    var functionToolCall = (ChatToolCall)chatRequestMessage.ToolCalls.ElementAt(i);
+                    functionToolCall.FunctionName.Should().Be("test");
                     functionToolCall.Id.Should().Be($"test_{i}");
-                    functionToolCall.Arguments.Should().Be("test");
+                    functionToolCall.FunctionArguments.Should().Be("test");
                 }
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
@@ -353,10 +351,11 @@ public async Task ItProcessToolCallResultMessageAsync()
             .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
             {
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("result");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (ToolChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("result");
                 chatRequestMessage.ToolCallId.Should().Be("test");
+
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -378,9 +377,9 @@ public async Task ItProcessParallelToolCallResultMessageAsync()
                 for (int i = 0; i < msgs.Count(); i++)
                 {
                     var innerMessage = msgs.ElementAt(i);
-                    innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                    var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                    chatRequestMessage.Content.Should().Be("result");
+                    innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                    var chatRequestMessage = (ToolChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                    chatRequestMessage.Content.First().Text.Should().Be("result");
                     chatRequestMessage.ToolCallId.Should().Be($"test_{i}");
                 }
                 return await innerAgent.GenerateReplyAsync(msgs);
@@ -406,10 +405,10 @@ public async Task ItProcessFunctionCallMiddlewareMessageFromUserAsync()
             {
                 msgs.Count().Should().Be(1);
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("result");
-                chatRequestMessage.Name.Should().Be("user");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (UserChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("result");
+                chatRequestMessage.ParticipantName.Should().Be("user");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -430,21 +429,21 @@ public async Task ItProcessFunctionCallMiddlewareMessageFromAssistantAsync()
             {
                 msgs.Count().Should().Be(2);
                 var innerMessage = msgs.Last();
-                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
-                chatRequestMessage.Content.Should().Be("result");
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var chatRequestMessage = (ToolChatMessage)((MessageEnvelope<ChatMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.First().Text.Should().Be("result");
                 chatRequestMessage.ToolCallId.Should().Be("test");
 
                 var toolCallMessage = msgs.First();
-                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var toolCallRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)toolCallMessage!).Content;
+                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var toolCallRequestMessage = (AssistantChatMessage)((MessageEnvelope<ChatMessage>)toolCallMessage!).Content;
                 toolCallRequestMessage.Content.Should().BeNullOrEmpty();
                 toolCallRequestMessage.ToolCalls.Count().Should().Be(1);
-                toolCallRequestMessage.ToolCalls.First().Should().BeOfType<ChatCompletionsFunctionToolCall>();
-                var functionToolCall = (ChatCompletionsFunctionToolCall)toolCallRequestMessage.ToolCalls.First();
-                functionToolCall.Name.Should().Be("test");
+                toolCallRequestMessage.ToolCalls.First().Should().BeOfType<ChatToolCall>();
+                var functionToolCall = (ChatToolCall)toolCallRequestMessage.ToolCalls.First();
+                functionToolCall.FunctionName.Should().Be("test");
                 functionToolCall.Id.Should().Be("test");
-                functionToolCall.Arguments.Should().Be("test");
+                functionToolCall.FunctionArguments.Should().Be("test");
                 return await innerAgent.GenerateReplyAsync(msgs);
             })
             .RegisterMiddleware(middleware);
@@ -465,26 +464,26 @@ public async Task ItProcessParallelFunctionCallMiddlewareMessageFromAssistantAsy
             {
                 msgs.Count().Should().Be(3);
                 var toolCallMessage = msgs.First();
-                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                var toolCallRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)toolCallMessage!).Content;
+                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                var toolCallRequestMessage = (AssistantChatMessage)((MessageEnvelope<ChatMessage>)toolCallMessage!).Content;
                 toolCallRequestMessage.Content.Should().BeNullOrEmpty();
                 toolCallRequestMessage.ToolCalls.Count().Should().Be(2);
 
                 for (int i = 0; i < toolCallRequestMessage.ToolCalls.Count(); i++)
                 {
-                    toolCallRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatCompletionsFunctionToolCall>();
-                    var functionToolCall = (ChatCompletionsFunctionToolCall)toolCallRequestMessage.ToolCalls.ElementAt(i);
-                    functionToolCall.Name.Should().Be("test");
+                    toolCallRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatToolCall>();
+                    var functionToolCall = (ChatToolCall)toolCallRequestMessage.ToolCalls.ElementAt(i);
+                    functionToolCall.FunctionName.Should().Be("test");
                     functionToolCall.Id.Should().Be($"test_{i}");
-                    functionToolCall.Arguments.Should().Be("test");
+                    functionToolCall.FunctionArguments.Should().Be("test");
                 }
 
                 for (int i = 1; i < msgs.Count(); i++)
                 {
                     var toolCallResultMessage = msgs.ElementAt(i);
-                    toolCallResultMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
-                    var toolCallResultRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)toolCallResultMessage!).Content;
-                    toolCallResultRequestMessage.Content.Should().Be("result");
+                    toolCallResultMessage!.Should().BeOfType<MessageEnvelope<ChatMessage>>();
+                    var toolCallResultRequestMessage = (ToolChatMessage)((MessageEnvelope<ChatMessage>)toolCallResultMessage!).Content;
+                    toolCallResultRequestMessage.Content.First().Text.Should().Be("result");
                     toolCallResultRequestMessage.ToolCallId.Should().Be($"test_{i - 1}");
                 }
 
@@ -504,41 +503,6 @@ public async Task ItProcessParallelFunctionCallMiddlewareMessageFromAssistantAsy
         await agent.GenerateReplyAsync([aggregateMessage]);
     }
 
-    [Fact]
-    public async Task ItConvertChatResponseMessageToTextMessageAsync()
-    {
-        var middleware = new OpenAIChatRequestMessageConnector();
-        var agent = new EchoAgent("assistant")
-            .RegisterMiddleware(middleware);
-
-        // text message
-        var textMessage = CreateInstance<ChatResponseMessage>(ChatRole.Assistant, "hello");
-        var chatRequestMessage = MessageEnvelope.Create(textMessage);
-
-        var message = await agent.GenerateReplyAsync([chatRequestMessage]);
-        message.Should().BeOfType<TextMessage>();
-        message.GetContent().Should().Be("hello");
-        message.GetRole().Should().Be(Role.Assistant);
-    }
-
-    [Fact]
-    public async Task ItConvertChatResponseMessageToToolCallMessageAsync()
-    {
-        var middleware = new OpenAIChatRequestMessageConnector();
-        var agent = new EchoAgent("assistant")
-            .RegisterMiddleware(middleware);
-
-        // tool call message
-        var toolCallMessage = CreateInstance<ChatResponseMessage>(ChatRole.Assistant, "textContent", new[] { new ChatCompletionsFunctionToolCall("test", "test", "test") }, new FunctionCall("test", "test"), CreateInstance<AzureChatExtensionsMessageContext>(), new Dictionary<string, BinaryData>());
-        var chatRequestMessage = MessageEnvelope.Create(toolCallMessage);
-        var message = await agent.GenerateReplyAsync([chatRequestMessage]);
-        message.Should().BeOfType<ToolCallMessage>();
-        message.GetToolCalls()!.Count().Should().Be(1);
-        message.GetToolCalls()!.First().FunctionName.Should().Be("test");
-        message.GetToolCalls()!.First().FunctionArguments.Should().Be("test");
-        message.GetContent().Should().Be("textContent");
-    }
-
     [Fact]
     public async Task ItReturnOriginalMessageWhenStrictModeIsFalseAsync()
     {
@@ -562,7 +526,7 @@ public async Task ItThrowInvalidOperationExceptionWhenStrictModeIsTrueAsync()
             .RegisterMiddleware(middleware);
 
         // text message
-        var textMessage = new ChatRequestUserMessage("hello");
+        var textMessage = new UserChatMessage("hello");
         var messageToSend = MessageEnvelope.Create(textMessage);
         Func<Task> action = async () => await agent.GenerateReplyAsync([messageToSend]);
 
@@ -574,22 +538,24 @@ public void ToOpenAIChatRequestMessageShortCircuitTest()
     {
         var agent = new EchoAgent("assistant");
         var middleware = new OpenAIChatRequestMessageConnector();
-        ChatRequestMessage[] messages =
+#pragma warning disable CS0618 // Type or member is obsolete
+        ChatMessage[] messages =
             [
-                new ChatRequestUserMessage("Hello"),
-                new ChatRequestAssistantMessage("How can I help you?"),
-                new ChatRequestSystemMessage("You are a helpful AI assistant"),
-                new ChatRequestFunctionMessage("result", "functionName"),
-                new ChatRequestToolMessage("test", "test"),
+                new UserChatMessage("Hello"),
+                new AssistantChatMessage("How can I help you?"),
+                new SystemChatMessage("You are a helpful AI assistant"),
+                new FunctionChatMessage("functionName", "result"),
+                new ToolChatMessage("test", "test"),
             ];
+#pragma warning restore CS0618 // Type or member is obsolete
 
         foreach (var oaiMessage in messages)
         {
-            IMessage message = new MessageEnvelope<ChatRequestMessage>(oaiMessage);
+            IMessage message = new MessageEnvelope<ChatMessage>(oaiMessage);
             var oaiMessages = middleware.ProcessIncomingMessages(agent, [message]);
             oaiMessages.Count().Should().Be(1);
             //oaiMessages.First().Should().BeOfType<IMessage<ChatRequestMessage>>();
-            if (oaiMessages.First() is IMessage<ChatRequestMessage> chatRequestMessage)
+            if (oaiMessages.First() is IMessage<ChatMessage> chatRequestMessage)
             {
                 chatRequestMessage.Content.Should().Be(oaiMessage);
             }
@@ -609,27 +575,27 @@ private void VerifyOAIMessages(IEnumerable<(IMessage, IEnumerable<IMessage>)> me
             foreach (var m in ms)
             {
                 object? obj = null;
-                var chatRequestMessage = (m as IMessage<ChatRequestMessage>)?.Content;
-                if (chatRequestMessage is ChatRequestUserMessage userMessage)
+                var chatRequestMessage = (m as IMessage<ChatMessage>)?.Content;
+                if (chatRequestMessage is UserChatMessage userMessage)
                 {
                     obj = new
                     {
-                        Role = userMessage.Role.ToString(),
+                        Role = "user",
                         Content = userMessage.Content,
-                        Name = userMessage.Name,
-                        MultiModaItem = userMessage.MultimodalContentItems?.Select(item =>
+                        Name = userMessage.ParticipantName,
+                        MultiModaItem = userMessage.Content?.Select(item =>
                         {
                             return item switch
                             {
-                                ChatMessageImageContentItem imageContentItem => new
+                                _ when item.Kind == ChatMessageContentPartKind.Image => new
                                 {
                                     Type = "Image",
-                                    ImageUrl = GetImageUrlFromContent(imageContentItem),
+                                    ImageUrl = GetImageUrlFromContent(item),
                                 } as object,
-                                ChatMessageTextContentItem textContentItem => new
+                                _ when item.Kind == ChatMessageContentPartKind.Text => new
                                 {
                                     Type = "Text",
-                                    Text = textContentItem.Text,
+                                    Text = item.Text,
                                 } as object,
                                 _ => throw new System.NotImplementedException(),
                             };
@@ -637,58 +603,60 @@ private void VerifyOAIMessages(IEnumerable<(IMessage, IEnumerable<IMessage>)> me
                     };
                 }
 
-                if (chatRequestMessage is ChatRequestAssistantMessage assistantMessage)
+                if (chatRequestMessage is AssistantChatMessage assistantMessage)
                 {
                     obj = new
                     {
-                        Role = assistantMessage.Role.ToString(),
+                        Role = "assistant",
                         Content = assistantMessage.Content,
-                        Name = assistantMessage.Name,
+                        Name = assistantMessage.ParticipantName,
                         TooCall = assistantMessage.ToolCalls.Select(tc =>
                         {
                             return tc switch
                             {
-                                ChatCompletionsFunctionToolCall functionToolCall => new
+                                ChatToolCall functionToolCall => new
                                 {
                                     Type = "Function",
-                                    Name = functionToolCall.Name,
-                                    Arguments = functionToolCall.Arguments,
+                                    Name = functionToolCall.FunctionName,
+                                    Arguments = functionToolCall.FunctionArguments,
                                     Id = functionToolCall.Id,
                                 } as object,
                                 _ => throw new System.NotImplementedException(),
                             };
                         }),
-                        FunctionCallName = assistantMessage.FunctionCall?.Name,
-                        FunctionCallArguments = assistantMessage.FunctionCall?.Arguments,
+                        FunctionCallName = assistantMessage.FunctionCall?.FunctionName,
+                        FunctionCallArguments = assistantMessage.FunctionCall?.FunctionArguments,
                     };
                 }
 
-                if (chatRequestMessage is ChatRequestSystemMessage systemMessage)
+                if (chatRequestMessage is SystemChatMessage systemMessage)
                 {
                     obj = new
                     {
-                        Name = systemMessage.Name,
-                        Role = systemMessage.Role.ToString(),
+                        Name = systemMessage.ParticipantName,
+                        Role = "system",
                         Content = systemMessage.Content,
                     };
                 }
 
-                if (chatRequestMessage is ChatRequestFunctionMessage functionMessage)
+#pragma warning disable CS0618 // Type or member is obsolete
+                if (chatRequestMessage is FunctionChatMessage functionMessage)
                 {
                     obj = new
                     {
-                        Role = functionMessage.Role.ToString(),
+                        Role = "function",
                         Content = functionMessage.Content,
-                        Name = functionMessage.Name,
+                        Name = functionMessage.FunctionName,
                     };
                 }
+#pragma warning restore CS0618 // Type or member is obsolete
 
-                if (chatRequestMessage is ChatRequestToolMessage toolCallMessage)
+                if (chatRequestMessage is ToolChatMessage toolCallMessage)
                 {
                     obj = new
                     {
-                        Role = toolCallMessage.Role.ToString(),
-                        Content = toolCallMessage.Content,
+                        Role = "tool",
+                        Content = toolCallMessage.Content.First().Text,
                         ToolCallId = toolCallMessage.ToolCallId,
                     };
                 }
@@ -707,9 +675,9 @@ private void VerifyOAIMessages(IEnumerable<(IMessage, IEnumerable<IMessage>)> me
         Approvals.Verify(json);
     }
 
-    private object? GetImageUrlFromContent(ChatMessageImageContentItem content)
+    private object? GetImageUrlFromContent(ChatMessageContentPart content)
     {
-        return content.GetType().GetProperty("ImageUrl", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)?.GetValue(content);
+        return content.ImageUri;
     }
 
     private static T CreateInstance<T>(params object[] args)
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt b/dotnet/test/AutoGen.OpenAI.V1.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt
new file mode 100644
index 00000000000..e8e9af84dbd
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/ApprovalTests/OpenAIMessageTests.BasicMessageTest.approved.txt
@@ -0,0 +1,174 @@
+[
+  {
+    "OriginalMessage": "TextMessage(system, You are a helpful AI assistant, )",
+    "ConvertedMessages": [
+      {
+        "Name": null,
+        "Role": "system",
+        "Content": "You are a helpful AI assistant"
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "TextMessage(user, Hello, user)",
+    "ConvertedMessages": [
+      {
+        "Role": "user",
+        "Content": "Hello",
+        "Name": "user",
+        "MultiModaItem": null
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "TextMessage(assistant, How can I help you?, assistant)",
+    "ConvertedMessages": [
+      {
+        "Role": "assistant",
+        "Content": "How can I help you?",
+        "Name": "assistant",
+        "TooCall": [],
+        "FunctionCallName": null,
+        "FunctionCallArguments": null
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "ImageMessage(user, https://example.com/image.png, user)",
+    "ConvertedMessages": [
+      {
+        "Role": "user",
+        "Content": null,
+        "Name": "user",
+        "MultiModaItem": [
+          {
+            "Type": "Image",
+            "ImageUrl": {
+              "Url": "https://example.com/image.png",
+              "Detail": null
+            }
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "MultiModalMessage(assistant, user)\n\tTextMessage(user, Hello, user)\n\tImageMessage(user, https://example.com/image.png, user)",
+    "ConvertedMessages": [
+      {
+        "Role": "user",
+        "Content": null,
+        "Name": "user",
+        "MultiModaItem": [
+          {
+            "Type": "Text",
+            "Text": "Hello"
+          },
+          {
+            "Type": "Image",
+            "ImageUrl": {
+              "Url": "https://example.com/image.png",
+              "Detail": null
+            }
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "ToolCallMessage(assistant)\n\tToolCall(test, test, )",
+    "ConvertedMessages": [
+      {
+        "Role": "assistant",
+        "Content": "",
+        "Name": "assistant",
+        "TooCall": [
+          {
+            "Type": "Function",
+            "Name": "test",
+            "Arguments": "test",
+            "Id": "test"
+          }
+        ],
+        "FunctionCallName": null,
+        "FunctionCallArguments": null
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "ToolCallResultMessage(user)\n\tToolCall(test, test, result)",
+    "ConvertedMessages": [
+      {
+        "Role": "tool",
+        "Content": "result",
+        "ToolCallId": "test"
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "ToolCallResultMessage(user)\n\tToolCall(result, test, test)\n\tToolCall(result, test, test)",
+    "ConvertedMessages": [
+      {
+        "Role": "tool",
+        "Content": "test",
+        "ToolCallId": "result_0"
+      },
+      {
+        "Role": "tool",
+        "Content": "test",
+        "ToolCallId": "result_1"
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "ToolCallMessage(assistant)\n\tToolCall(test, test, )\n\tToolCall(test, test, )",
+    "ConvertedMessages": [
+      {
+        "Role": "assistant",
+        "Content": "",
+        "Name": "assistant",
+        "TooCall": [
+          {
+            "Type": "Function",
+            "Name": "test",
+            "Arguments": "test",
+            "Id": "test_0"
+          },
+          {
+            "Type": "Function",
+            "Name": "test",
+            "Arguments": "test",
+            "Id": "test_1"
+          }
+        ],
+        "FunctionCallName": null,
+        "FunctionCallArguments": null
+      }
+    ]
+  },
+  {
+    "OriginalMessage": "AggregateMessage(assistant)\n\tToolCallMessage(assistant)\n\tToolCall(test, test, )\n\tToolCallResultMessage(assistant)\n\tToolCall(test, test, result)",
+    "ConvertedMessages": [
+      {
+        "Role": "assistant",
+        "Content": "",
+        "Name": "assistant",
+        "TooCall": [
+          {
+            "Type": "Function",
+            "Name": "test",
+            "Arguments": "test",
+            "Id": "test"
+          }
+        ],
+        "FunctionCallName": null,
+        "FunctionCallArguments": null
+      },
+      {
+        "Role": "tool",
+        "Content": "result",
+        "ToolCallId": "test"
+      }
+    ]
+  }
+]
\ No newline at end of file
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/AutoGen.OpenAI.V1.Tests.csproj b/dotnet/test/AutoGen.OpenAI.V1.Tests/AutoGen.OpenAI.V1.Tests.csproj
new file mode 100644
index 00000000000..0be8c520033
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/AutoGen.OpenAI.V1.Tests.csproj
@@ -0,0 +1,24 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>$(TestTargetFrameworks)</TargetFrameworks>
+    <IsPackable>false</IsPackable>
+    <IsTestProject>True</IsTestProject>
+    <GenerateDocumentationFile>True</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\AutoGen.OpenAI.V1\AutoGen.OpenAI.V1.csproj" />
+    <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
+    <ProjectReference Include="..\AutoGen.Test.Share\AutoGen.Tests.Share.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="ApprovalTests\OpenAIMessageTests.BasicMessageTest.approved.txt">
+      <ParentFile>$([System.String]::Copy('%(FileName)').Split('.')[0])</ParentFile>
+      <ParentExtension>$(ProjectExt.Replace('proj', ''))</ParentExtension>
+      <DependentUpon>%(ParentFile)%(ParentExtension)</DependentUpon>
+    </None>
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/GPTAgentTest.cs b/dotnet/test/AutoGen.OpenAI.V1.Tests/GPTAgentTest.cs
new file mode 100644
index 00000000000..b8944d45d76
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/GPTAgentTest.cs
@@ -0,0 +1,270 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// GPTAgentTest.cs
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Threading.Tasks;
+using AutoGen.OpenAI.V1.Extension;
+using AutoGen.Tests;
+using Azure.AI.OpenAI;
+using FluentAssertions;
+using Xunit.Abstractions;
+
+namespace AutoGen.OpenAI.V1.Tests;
+
+public partial class GPTAgentTest
+{
+    private ITestOutputHelper _output;
+    public GPTAgentTest(ITestOutputHelper output)
+    {
+        _output = output;
+    }
+
+    private ILLMConfig CreateAzureOpenAIGPT35TurboConfig()
+    {
+        var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new ArgumentException("AZURE_OPENAI_API_KEY is not set");
+        var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new ArgumentException("AZURE_OPENAI_ENDPOINT is not set");
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new ArgumentException("AZURE_OPENAI_DEPLOY_NAME is not set");
+        return new AzureOpenAIConfig(endpoint, deployName, key);
+    }
+
+    private ILLMConfig CreateOpenAIGPT4VisionConfig()
+    {
+        var key = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new ArgumentException("OPENAI_API_KEY is not set");
+        return new OpenAIConfig(key, "gpt-4o-mini");
+    }
+
+    [Obsolete]
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task GPTAgentTestAsync()
+    {
+        var config = this.CreateAzureOpenAIGPT35TurboConfig();
+
+        var agent = new GPTAgent("gpt", "You are a helpful AI assistant", config);
+
+        await UpperCaseTestAsync(agent);
+        await UpperCaseStreamingTestAsync(agent);
+    }
+
+    [Obsolete]
+    [ApiKeyFact("OPENAI_API_KEY", "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT")]
+    public async Task GPTAgentVisionTestAsync()
+    {
+        var visionConfig = this.CreateOpenAIGPT4VisionConfig();
+        var visionAgent = new GPTAgent(
+            name: "gpt",
+            systemMessage: "You are a helpful AI assistant",
+            config: visionConfig,
+            temperature: 0);
+
+        var gpt3Config = this.CreateAzureOpenAIGPT35TurboConfig();
+        var gpt3Agent = new GPTAgent(
+            name: "gpt3",
+            systemMessage: "You are a helpful AI assistant, return highest label from conversation",
+            config: gpt3Config,
+            temperature: 0,
+            functions: new[] { this.GetHighestLabelFunctionContract.ToOpenAIFunctionDefinition() },
+            functionMap: new Dictionary<string, Func<string, Task<string>>>
+            {
+                { nameof(GetHighestLabel), this.GetHighestLabelWrapper },
+            });
+
+        var imageUri = new Uri(@"https://microsoft.github.io/autogen/assets/images/level2algebra-659ba95286432d9945fc89e84d606797.png");
+        var oaiMessage = new ChatRequestUserMessage(
+            new ChatMessageTextContentItem("which label has the highest inference cost"),
+            new ChatMessageImageContentItem(imageUri));
+        var multiModalMessage = new MultiModalMessage(Role.User,
+            [
+                new TextMessage(Role.User, "which label has the highest inference cost", from: "user"),
+                new ImageMessage(Role.User, imageUri, from: "user"),
+            ],
+            from: "user");
+
+        var imageMessage = new ImageMessage(Role.User, imageUri, from: "user");
+
+        string imagePath = Path.Combine("testData", "images", "square.png");
+        ImageMessage imageMessageData;
+        using (var fs = new FileStream(imagePath, FileMode.Open, FileAccess.Read))
+        {
+            var ms = new MemoryStream();
+            await fs.CopyToAsync(ms);
+            ms.Seek(0, SeekOrigin.Begin);
+            var imageData = await BinaryData.FromStreamAsync(ms, "image/png");
+            imageMessageData = new ImageMessage(Role.Assistant, imageData, from: "user");
+        }
+
+        IMessage[] messages = [
+            MessageEnvelope.Create(oaiMessage),
+            multiModalMessage,
+            imageMessage,
+            imageMessageData
+            ];
+
+        foreach (var message in messages)
+        {
+            var response = await visionAgent.SendAsync(message);
+            response.From.Should().Be(visionAgent.Name);
+
+            var labelResponse = await gpt3Agent.SendAsync(response);
+            labelResponse.From.Should().Be(gpt3Agent.Name);
+            labelResponse.GetToolCalls()!.First().FunctionName.Should().Be(nameof(GetHighestLabel));
+        }
+    }
+
+    [Obsolete]
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task GPTFunctionCallAgentTestAsync()
+    {
+        var config = this.CreateAzureOpenAIGPT35TurboConfig();
+        var agentWithFunction = new GPTAgent("gpt", "You are a helpful AI assistant", config, 0, functions: new[] { this.EchoAsyncFunctionContract.ToOpenAIFunctionDefinition() });
+
+        await EchoFunctionCallTestAsync(agentWithFunction);
+    }
+
+    [Obsolete]
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task GPTAgentFunctionCallSelfExecutionTestAsync()
+    {
+        var config = this.CreateAzureOpenAIGPT35TurboConfig();
+        var agent = new GPTAgent(
+            name: "gpt",
+            systemMessage: "You are a helpful AI assistant",
+            config: config,
+            temperature: 0,
+            functions: new[] { this.EchoAsyncFunctionContract.ToOpenAIFunctionDefinition() },
+            functionMap: new Dictionary<string, Func<string, Task<string>>>
+            {
+                { nameof(EchoAsync), this.EchoAsyncWrapper },
+            });
+
+        await EchoFunctionCallExecutionStreamingTestAsync(agent);
+        await EchoFunctionCallExecutionTestAsync(agent);
+    }
+
+    /// <summary>
+    /// echo when asked.
+    /// </summary>
+    /// <param name="message">message to echo</param>
+    [FunctionAttribute]
+    public async Task<string> EchoAsync(string message)
+    {
+        return $"[ECHO] {message}";
+    }
+
+    /// <summary>
+    /// return the label name with hightest inference cost
+    /// </summary>
+    /// <param name="labelName"></param>
+    /// <returns></returns>
+    [FunctionAttribute]
+    public async Task<string> GetHighestLabel(string labelName, string color)
+    {
+        return $"[HIGHEST_LABEL] {labelName} {color}";
+    }
+
+    private async Task EchoFunctionCallTestAsync(IAgent agent)
+    {
+        //var message = new TextMessage(Role.System, "You are a helpful AI assistant that call echo function");
+        var helloWorld = new TextMessage(Role.User, "echo Hello world");
+
+        var reply = await agent.SendAsync(chatHistory: new[] { helloWorld });
+
+        reply.From.Should().Be(agent.Name);
+        reply.GetToolCalls()!.First().FunctionName.Should().Be(nameof(EchoAsync));
+    }
+
+    private async Task EchoFunctionCallExecutionTestAsync(IAgent agent)
+    {
+        //var message = new TextMessage(Role.System, "You are a helpful AI assistant that echo whatever user says");
+        var helloWorld = new TextMessage(Role.User, "echo Hello world");
+
+        var reply = await agent.SendAsync(chatHistory: new[] { helloWorld });
+
+        reply.GetContent().Should().Be("[ECHO] Hello world");
+        reply.From.Should().Be(agent.Name);
+        reply.Should().BeOfType<ToolCallAggregateMessage>();
+    }
+
+    private async Task EchoFunctionCallExecutionStreamingTestAsync(IStreamingAgent agent)
+    {
+        //var message = new TextMessage(Role.System, "You are a helpful AI assistant that echo whatever user says");
+        var helloWorld = new TextMessage(Role.User, "echo Hello world");
+        var option = new GenerateReplyOptions
+        {
+            Temperature = 0,
+        };
+        var replyStream = agent.GenerateStreamingReplyAsync(messages: new[] { helloWorld }, option);
+        var answer = "[ECHO] Hello world";
+        IMessage? finalReply = default;
+        await foreach (var reply in replyStream)
+        {
+            reply.From.Should().Be(agent.Name);
+            finalReply = reply;
+        }
+
+        if (finalReply is ToolCallAggregateMessage aggregateMessage)
+        {
+            var toolCallResultMessage = aggregateMessage.Message2;
+            toolCallResultMessage.ToolCalls.First().Result.Should().Be(answer);
+            toolCallResultMessage.From.Should().Be(agent.Name);
+            toolCallResultMessage.ToolCalls.First().FunctionName.Should().Be(nameof(EchoAsync));
+        }
+        else
+        {
+            throw new Exception("unexpected message type");
+        }
+    }
+
+    private async Task UpperCaseTestAsync(IAgent agent)
+    {
+        var message = new TextMessage(Role.User, "Please convert abcde to upper case.");
+
+        var reply = await agent.SendAsync(chatHistory: new[] { message });
+
+        reply.GetContent().Should().Contain("ABCDE");
+        reply.From.Should().Be(agent.Name);
+    }
+
+    private async Task UpperCaseStreamingTestAsync(IStreamingAgent agent)
+    {
+        var message = new TextMessage(Role.User, "Please convert 'hello world' to upper case");
+        var option = new GenerateReplyOptions
+        {
+            Temperature = 0,
+        };
+        var replyStream = agent.GenerateStreamingReplyAsync(messages: new[] { message }, option);
+        var answer = "HELLO WORLD";
+        TextMessage? finalReply = default;
+        await foreach (var reply in replyStream)
+        {
+            if (reply is TextMessageUpdate update)
+            {
+                update.From.Should().Be(agent.Name);
+
+                if (finalReply is null)
+                {
+                    finalReply = new TextMessage(update);
+                }
+                else
+                {
+                    finalReply.Update(update);
+                }
+
+                continue;
+            }
+            else if (reply is TextMessage textMessage)
+            {
+                finalReply = textMessage;
+                continue;
+            }
+
+            throw new Exception("unexpected message type");
+        }
+
+        finalReply!.Content.Should().Contain(answer);
+        finalReply!.Role.Should().Be(Role.Assistant);
+        finalReply!.From.Should().Be(agent.Name);
+    }
+}
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/GlobalUsing.cs b/dotnet/test/AutoGen.OpenAI.V1.Tests/GlobalUsing.cs
new file mode 100644
index 00000000000..d66bf001ed5
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/GlobalUsing.cs
@@ -0,0 +1,4 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// GlobalUsing.cs
+
+global using AutoGen.Core;
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/MathClassTest.cs b/dotnet/test/AutoGen.OpenAI.V1.Tests/MathClassTest.cs
new file mode 100644
index 00000000000..a1f9541f467
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/MathClassTest.cs
@@ -0,0 +1,222 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// MathClassTest.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using AutoGen.OpenAI.V1.Extension;
+using AutoGen.Tests;
+using Azure.AI.OpenAI;
+using FluentAssertions;
+using Xunit.Abstractions;
+
+namespace AutoGen.OpenAI.V1.Tests
+{
+    public partial class MathClassTest
+    {
+        private readonly ITestOutputHelper _output;
+
+        // as of 2024-05-20, aoai return 500 error when round > 1
+        // I'm pretty sure that round > 5 was supported before
+        // So this is probably some wield regression on aoai side
+        // I'll keep this test case here for now, plus setting round to 1
+        // so the test can still pass.
+        // In the future, we should rewind this test case to round > 1 (previously was 5)
+        private int round = 1;
+        public MathClassTest(ITestOutputHelper output)
+        {
+            _output = output;
+        }
+
+        private Task<IMessage> Print(IEnumerable<IMessage> messages, GenerateReplyOptions? option, IAgent agent, CancellationToken ct)
+        {
+            try
+            {
+                var reply = agent.GenerateReplyAsync(messages, option, ct).Result;
+
+                _output.WriteLine(reply.FormatMessage());
+                return Task.FromResult(reply);
+            }
+            catch (Exception)
+            {
+                _output.WriteLine("Request failed");
+                _output.WriteLine($"agent name: {agent.Name}");
+                foreach (var message in messages)
+                {
+                    _output.WriteLine(message.FormatMessage());
+                }
+
+                throw;
+            }
+
+        }
+
+        [FunctionAttribute]
+        public async Task<string> CreateMathQuestion(string question, int question_index)
+        {
+            return $@"[MATH_QUESTION]
+Question {question_index}:
+{question}
+
+Student, please answer";
+        }
+
+        [FunctionAttribute]
+        public async Task<string> AnswerQuestion(string answer)
+        {
+            return $@"[MATH_ANSWER]
+The answer is {answer}
+teacher please check answer";
+        }
+
+        [FunctionAttribute]
+        public async Task<string> AnswerIsCorrect(string message)
+        {
+            return $@"[ANSWER_IS_CORRECT]
+{message}
+please update progress";
+        }
+
+        [FunctionAttribute]
+        public async Task<string> UpdateProgress(int correctAnswerCount)
+        {
+            if (correctAnswerCount >= this.round)
+            {
+                return $@"[UPDATE_PROGRESS]
+{GroupChatExtension.TERMINATE}";
+            }
+            else
+            {
+                return $@"[UPDATE_PROGRESS]
+the number of resolved question is {correctAnswerCount}
+teacher, please create the next math question";
+            }
+        }
+
+
+        [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+        public async Task OpenAIAgentMathChatTestAsync()
+        {
+            var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new ArgumentException("AZURE_OPENAI_API_KEY is not set");
+            var endPoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new ArgumentException("AZURE_OPENAI_ENDPOINT is not set");
+            var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new ArgumentException("AZURE_OPENAI_DEPLOY_NAME is not set");
+            var openaiClient = new OpenAIClient(new Uri(endPoint), new Azure.AzureKeyCredential(key));
+            var teacher = await CreateTeacherAgentAsync(openaiClient, deployName);
+            var student = await CreateStudentAssistantAgentAsync(openaiClient, deployName);
+
+            var adminFunctionMiddleware = new FunctionCallMiddleware(
+                functions: [this.UpdateProgressFunctionContract],
+                functionMap: new Dictionary<string, Func<string, Task<string>>>
+                {
+                    { this.UpdateProgressFunctionContract.Name, this.UpdateProgressWrapper },
+                });
+            var admin = new OpenAIChatAgent(
+                openAIClient: openaiClient,
+                modelName: deployName,
+                name: "Admin",
+                systemMessage: $@"You are admin. You update progress after each question is answered.")
+                .RegisterMessageConnector()
+                .RegisterStreamingMiddleware(adminFunctionMiddleware)
+                .RegisterMiddleware(Print);
+
+            var groupAdmin = new OpenAIChatAgent(
+                openAIClient: openaiClient,
+                modelName: deployName,
+                name: "GroupAdmin",
+                systemMessage: "You are group admin. You manage the group chat.")
+                .RegisterMessageConnector()
+                .RegisterMiddleware(Print);
+            await RunMathChatAsync(teacher, student, admin, groupAdmin);
+        }
+
+        private async Task<IAgent> CreateTeacherAgentAsync(OpenAIClient client, string model)
+        {
+            var functionCallMiddleware = new FunctionCallMiddleware(
+                functions: [this.CreateMathQuestionFunctionContract, this.AnswerIsCorrectFunctionContract],
+                functionMap: new Dictionary<string, Func<string, Task<string>>>
+                {
+                    { this.CreateMathQuestionFunctionContract.Name!, this.CreateMathQuestionWrapper },
+                    { this.AnswerIsCorrectFunctionContract.Name!, this.AnswerIsCorrectWrapper },
+                });
+
+            var teacher = new OpenAIChatAgent(
+                openAIClient: client,
+                name: "Teacher",
+                systemMessage: @"You are a preschool math teacher.
+You create math question and ask student to answer it.
+Then you check if the answer is correct.
+If the answer is wrong, you ask student to fix it",
+                modelName: model)
+                .RegisterMessageConnector()
+                .RegisterStreamingMiddleware(functionCallMiddleware)
+                .RegisterMiddleware(Print);
+
+            return teacher;
+        }
+
+        private async Task<IAgent> CreateStudentAssistantAgentAsync(OpenAIClient client, string model)
+        {
+            var functionCallMiddleware = new FunctionCallMiddleware(
+                functions: [this.AnswerQuestionFunctionContract],
+                functionMap: new Dictionary<string, Func<string, Task<string>>>
+                {
+                    { this.AnswerQuestionFunctionContract.Name!, this.AnswerQuestionWrapper },
+                });
+            var student = new OpenAIChatAgent(
+                openAIClient: client,
+                name: "Student",
+                modelName: model,
+                systemMessage: @"You are a student. You answer math question from teacher.")
+                .RegisterMessageConnector()
+                .RegisterStreamingMiddleware(functionCallMiddleware)
+                .RegisterMiddleware(Print);
+
+            return student;
+        }
+
+        private async Task RunMathChatAsync(IAgent teacher, IAgent student, IAgent admin, IAgent groupAdmin)
+        {
+            var teacher2Student = Transition.Create(teacher, student);
+            var student2Teacher = Transition.Create(student, teacher);
+            var teacher2Admin = Transition.Create(teacher, admin);
+            var admin2Teacher = Transition.Create(admin, teacher);
+            var workflow = new Graph(
+                [
+                    teacher2Student,
+                    student2Teacher,
+                    teacher2Admin,
+                    admin2Teacher,
+                ]);
+            var group = new GroupChat(
+                workflow: workflow,
+                members: [
+                    admin,
+                    teacher,
+                    student,
+                ],
+                admin: groupAdmin);
+
+            var groupChatManager = new GroupChatManager(group);
+            var chatHistory = await admin.InitiateChatAsync(groupChatManager, "teacher, create question", maxRound: 50);
+
+            chatHistory.Where(msg => msg.From == teacher.Name && msg.GetContent()?.Contains("[MATH_QUESTION]") is true)
+                    .Count()
+                    .Should().BeGreaterThanOrEqualTo(this.round);
+
+            chatHistory.Where(msg => msg.From == student.Name && msg.GetContent()?.Contains("[MATH_ANSWER]") is true)
+                    .Count()
+                    .Should().BeGreaterThanOrEqualTo(this.round);
+
+            chatHistory.Where(msg => msg.From == teacher.Name && msg.GetContent()?.Contains("[ANSWER_IS_CORRECT]") is true)
+                    .Count()
+                    .Should().BeGreaterThanOrEqualTo(this.round);
+
+            // check if there's terminate chat message from admin
+            chatHistory.Where(msg => msg.From == admin.Name && msg.IsGroupChatTerminateMessage())
+                    .Count()
+                    .Should().Be(1);
+        }
+    }
+}
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/OpenAIChatAgentTest.cs b/dotnet/test/AutoGen.OpenAI.V1.Tests/OpenAIChatAgentTest.cs
new file mode 100644
index 00000000000..0957cc9f49b
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/OpenAIChatAgentTest.cs
@@ -0,0 +1,279 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIChatAgentTest.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading.Tasks;
+using AutoGen.OpenAI.V1.Extension;
+using AutoGen.Tests;
+using Azure.AI.OpenAI;
+using FluentAssertions;
+
+namespace AutoGen.OpenAI.V1.Tests;
+
+public partial class OpenAIChatAgentTest
+{
+    /// <summary>
+    /// Get the weather for a location.
+    /// </summary>
+    /// <param name="location">location</param>
+    /// <returns></returns>
+    [Function]
+    public async Task<string> GetWeatherAsync(string location)
+    {
+        return $"The weather in {location} is sunny.";
+    }
+
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task BasicConversationTestAsync()
+    {
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
+        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
+        var openAIChatAgent = new OpenAIChatAgent(
+            openAIClient: openaiClient,
+            name: "assistant",
+            modelName: deployName);
+
+        // By default, OpenAIChatClient supports the following message types
+        // - IMessage<ChatRequestMessage>
+        var chatMessageContent = MessageEnvelope.Create(new ChatRequestUserMessage("Hello"));
+        var reply = await openAIChatAgent.SendAsync(chatMessageContent);
+
+        reply.Should().BeOfType<MessageEnvelope<ChatCompletions>>();
+        reply.As<MessageEnvelope<ChatCompletions>>().From.Should().Be("assistant");
+        reply.As<MessageEnvelope<ChatCompletions>>().Content.Choices.First().Message.Role.Should().Be(ChatRole.Assistant);
+        reply.As<MessageEnvelope<ChatCompletions>>().Content.Usage.TotalTokens.Should().BeGreaterThan(0);
+
+        // test streaming
+        var streamingReply = openAIChatAgent.GenerateStreamingReplyAsync(new[] { chatMessageContent });
+
+        await foreach (var streamingMessage in streamingReply)
+        {
+            streamingMessage.Should().BeOfType<MessageEnvelope<StreamingChatCompletionsUpdate>>();
+            streamingMessage.As<MessageEnvelope<StreamingChatCompletionsUpdate>>().From.Should().Be("assistant");
+        }
+    }
+
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task OpenAIChatMessageContentConnectorTestAsync()
+    {
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
+        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
+        var openAIChatAgent = new OpenAIChatAgent(
+            openAIClient: openaiClient,
+            name: "assistant",
+            modelName: deployName);
+
+        MiddlewareStreamingAgent<OpenAIChatAgent> assistant = openAIChatAgent
+            .RegisterMessageConnector();
+
+        var messages = new IMessage[]
+        {
+            MessageEnvelope.Create(new ChatRequestUserMessage("Hello")),
+            new TextMessage(Role.Assistant, "Hello", from: "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.Assistant, "Hello", from: "user"),
+                ],
+                from: "user"),
+        };
+
+        foreach (var message in messages)
+        {
+            var reply = await assistant.SendAsync(message);
+
+            reply.Should().BeOfType<TextMessage>();
+            reply.As<TextMessage>().From.Should().Be("assistant");
+        }
+
+        // test streaming
+        foreach (var message in messages)
+        {
+            var reply = assistant.GenerateStreamingReplyAsync([message]);
+
+            await foreach (var streamingMessage in reply)
+            {
+                streamingMessage.Should().BeOfType<TextMessageUpdate>();
+                streamingMessage.As<TextMessageUpdate>().From.Should().Be("assistant");
+            }
+        }
+    }
+
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task OpenAIChatAgentToolCallTestAsync()
+    {
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
+        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
+        var openAIChatAgent = new OpenAIChatAgent(
+            openAIClient: openaiClient,
+            name: "assistant",
+            modelName: deployName);
+
+        var functionCallMiddleware = new FunctionCallMiddleware(
+            functions: [this.GetWeatherAsyncFunctionContract]);
+        MiddlewareStreamingAgent<OpenAIChatAgent> assistant = openAIChatAgent
+            .RegisterMessageConnector();
+
+        assistant.StreamingMiddlewares.Count().Should().Be(1);
+        var functionCallAgent = assistant
+            .RegisterStreamingMiddleware(functionCallMiddleware);
+
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            MessageEnvelope.Create(new ChatRequestUserMessage(question)),
+            new TextMessage(Role.Assistant, question, from: "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.Assistant, question, from: "user"),
+                ],
+                from: "user"),
+        };
+
+        foreach (var message in messages)
+        {
+            var reply = await functionCallAgent.SendAsync(message);
+
+            reply.Should().BeOfType<ToolCallMessage>();
+            reply.As<ToolCallMessage>().From.Should().Be("assistant");
+            reply.As<ToolCallMessage>().ToolCalls.Count().Should().Be(1);
+            reply.As<ToolCallMessage>().ToolCalls.First().FunctionName.Should().Be(this.GetWeatherAsyncFunctionContract.Name);
+        }
+
+        // test streaming
+        foreach (var message in messages)
+        {
+            var reply = functionCallAgent.GenerateStreamingReplyAsync([message]);
+            ToolCallMessage? toolCallMessage = null;
+            await foreach (var streamingMessage in reply)
+            {
+                streamingMessage.Should().BeOfType<ToolCallMessageUpdate>();
+                streamingMessage.As<ToolCallMessageUpdate>().From.Should().Be("assistant");
+                if (toolCallMessage is null)
+                {
+                    toolCallMessage = new ToolCallMessage(streamingMessage.As<ToolCallMessageUpdate>());
+                }
+                else
+                {
+                    toolCallMessage.Update(streamingMessage.As<ToolCallMessageUpdate>());
+                }
+            }
+
+            toolCallMessage.Should().NotBeNull();
+            toolCallMessage!.From.Should().Be("assistant");
+            toolCallMessage.ToolCalls.Count().Should().Be(1);
+            toolCallMessage.ToolCalls.First().FunctionName.Should().Be(this.GetWeatherAsyncFunctionContract.Name);
+        }
+    }
+
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task OpenAIChatAgentToolCallInvokingTestAsync()
+    {
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
+        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
+        var openAIChatAgent = new OpenAIChatAgent(
+            openAIClient: openaiClient,
+            name: "assistant",
+            modelName: deployName);
+
+        var functionCallMiddleware = new FunctionCallMiddleware(
+            functions: [this.GetWeatherAsyncFunctionContract],
+            functionMap: new Dictionary<string, Func<string, Task<string>>> { { this.GetWeatherAsyncFunctionContract.Name!, this.GetWeatherAsyncWrapper } });
+        MiddlewareStreamingAgent<OpenAIChatAgent> assistant = openAIChatAgent
+            .RegisterMessageConnector();
+
+        var functionCallAgent = assistant
+            .RegisterStreamingMiddleware(functionCallMiddleware);
+
+        var question = "What's the weather in Seattle";
+        var messages = new IMessage[]
+        {
+            MessageEnvelope.Create(new ChatRequestUserMessage(question)),
+            new TextMessage(Role.Assistant, question, from: "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.Assistant, question, from: "user"),
+                ],
+                from: "user"),
+        };
+
+        foreach (var message in messages)
+        {
+            var reply = await functionCallAgent.SendAsync(message);
+
+            reply.Should().BeOfType<ToolCallAggregateMessage>();
+            reply.From.Should().Be("assistant");
+            reply.GetToolCalls()!.Count().Should().Be(1);
+            reply.GetToolCalls()!.First().FunctionName.Should().Be(this.GetWeatherAsyncFunctionContract.Name);
+            reply.GetContent()!.ToLower().Should().Contain("seattle");
+        }
+
+        // test streaming
+        foreach (var message in messages)
+        {
+            var reply = functionCallAgent.GenerateStreamingReplyAsync([message]);
+            await foreach (var streamingMessage in reply)
+            {
+                if (streamingMessage is not IMessage)
+                {
+                    streamingMessage.Should().BeOfType<ToolCallMessageUpdate>();
+                    streamingMessage.As<ToolCallMessageUpdate>().From.Should().Be("assistant");
+                }
+                else
+                {
+                    streamingMessage.Should().BeOfType<ToolCallAggregateMessage>();
+                    streamingMessage.As<IMessage>().GetContent()!.ToLower().Should().Contain("seattle");
+                }
+            }
+        }
+    }
+
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task ItCreateOpenAIChatAgentWithChatCompletionOptionAsync()
+    {
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
+        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
+        var options = new ChatCompletionsOptions(deployName, [])
+        {
+            Temperature = 0.7f,
+            MaxTokens = 1,
+        };
+
+        var openAIChatAgent = new OpenAIChatAgent(
+            openAIClient: openaiClient,
+            name: "assistant",
+            options: options)
+            .RegisterMessageConnector();
+
+        var respond = await openAIChatAgent.SendAsync("hello");
+        respond.GetContent()?.Should().NotBeNullOrEmpty();
+    }
+
+    [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+    public async Task ItThrowExceptionWhenChatCompletionOptionContainsMessages()
+    {
+        var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
+        var openaiClient = CreateOpenAIClientFromAzureOpenAI();
+        var options = new ChatCompletionsOptions(deployName, [new ChatRequestUserMessage("hi")])
+        {
+            Temperature = 0.7f,
+            MaxTokens = 1,
+        };
+
+        var action = () => new OpenAIChatAgent(
+            openAIClient: openaiClient,
+            name: "assistant",
+            options: options)
+            .RegisterMessageConnector();
+
+        action.Should().ThrowExactly<ArgumentException>().WithMessage("Messages should not be provided in options");
+    }
+
+    private OpenAIClient CreateOpenAIClientFromAzureOpenAI()
+    {
+        var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
+        var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
+        return new OpenAIClient(new Uri(endpoint), new Azure.AzureKeyCredential(key));
+    }
+}
diff --git a/dotnet/test/AutoGen.OpenAI.V1.Tests/OpenAIMessageTests.cs b/dotnet/test/AutoGen.OpenAI.V1.Tests/OpenAIMessageTests.cs
new file mode 100644
index 00000000000..3050c4e8e09
--- /dev/null
+++ b/dotnet/test/AutoGen.OpenAI.V1.Tests/OpenAIMessageTests.cs
@@ -0,0 +1,724 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIMessageTests.cs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using System.Text.Json;
+using System.Threading.Tasks;
+using ApprovalTests;
+using ApprovalTests.Namers;
+using ApprovalTests.Reporters;
+using AutoGen.Tests;
+using Azure.AI.OpenAI;
+using FluentAssertions;
+using Xunit;
+
+namespace AutoGen.OpenAI.V1.Tests;
+
+public class OpenAIMessageTests
+{
+    private readonly JsonSerializerOptions jsonSerializerOptions = new JsonSerializerOptions
+    {
+        WriteIndented = true,
+        IgnoreReadOnlyProperties = false,
+    };
+
+    [Fact]
+    [UseReporter(typeof(DiffReporter))]
+    [UseApprovalSubdirectory("ApprovalTests")]
+    public void BasicMessageTest()
+    {
+        IMessage[] messages = [
+            new TextMessage(Role.System, "You are a helpful AI assistant"),
+            new TextMessage(Role.User, "Hello", "user"),
+            new TextMessage(Role.Assistant, "How can I help you?", from: "assistant"),
+            new ImageMessage(Role.User, "https://example.com/image.png", "user"),
+            new MultiModalMessage(Role.Assistant,
+                [
+                    new TextMessage(Role.User, "Hello", "user"),
+                    new ImageMessage(Role.User, "https://example.com/image.png", "user"),
+                ], "user"),
+            new ToolCallMessage("test", "test", "assistant"),
+            new ToolCallResultMessage("result", "test", "test", "user"),
+            new ToolCallResultMessage(
+                [
+                    new ToolCall("result", "test", "test"),
+                    new ToolCall("result", "test", "test"),
+                ], "user"),
+            new ToolCallMessage(
+                [
+                    new ToolCall("test", "test"),
+                    new ToolCall("test", "test"),
+                ], "assistant"),
+            new AggregateMessage<ToolCallMessage, ToolCallResultMessage>(
+                message1: new ToolCallMessage("test", "test", "assistant"),
+                message2: new ToolCallResultMessage("result", "test", "test", "assistant"), "assistant"),
+        ];
+        var openaiMessageConnectorMiddleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant");
+
+        var oaiMessages = messages.Select(m => (m, openaiMessageConnectorMiddleware.ProcessIncomingMessages(agent, [m])));
+        VerifyOAIMessages(oaiMessages);
+    }
+
+    [Fact]
+    public async Task ItProcessUserTextMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("Hello");
+                chatRequestMessage.Name.Should().Be("user");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new TextMessage(Role.User, "Hello", "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItShortcutChatRequestMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestUserMessage>>();
+
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestUserMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("hello");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var userMessage = new ChatRequestUserMessage("hello");
+        var chatRequestMessage = MessageEnvelope.Create(userMessage);
+        await agent.GenerateReplyAsync([chatRequestMessage]);
+    }
+
+    [Fact]
+    public async Task ItShortcutMessageWhenStrictModelIsFalseAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<string>>();
+
+                var chatRequestMessage = ((MessageEnvelope<string>)innerMessage!).Content;
+                chatRequestMessage.Should().Be("hello");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var userMessage = "hello";
+        var chatRequestMessage = MessageEnvelope.Create(userMessage);
+        await agent.GenerateReplyAsync([chatRequestMessage]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenStrictModeIsTrueAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var userMessage = "hello";
+        var chatRequestMessage = MessageEnvelope.Create(userMessage);
+        Func<Task> action = async () => await agent.GenerateReplyAsync([chatRequestMessage]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: MessageEnvelope`1");
+    }
+
+    [Fact]
+    public async Task ItProcessAssistantTextMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("How can I help you?");
+                chatRequestMessage.Name.Should().Be("assistant");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // assistant message
+        IMessage message = new TextMessage(Role.Assistant, "How can I help you?", "assistant");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessSystemTextMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestSystemMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("You are a helpful AI assistant");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // system message
+        IMessage message = new TextMessage(Role.System, "You are a helpful AI assistant");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessImageMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().BeNullOrEmpty();
+                chatRequestMessage.Name.Should().Be("user");
+                chatRequestMessage.MultimodalContentItems.Count().Should().Be(1);
+                chatRequestMessage.MultimodalContentItems.First().Should().BeOfType<ChatMessageImageContentItem>();
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new ImageMessage(Role.User, "https://example.com/image.png", "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenProcessingImageMessageFromSelfAndStrictModeIsTrueAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        var imageMessage = new ImageMessage(Role.Assistant, "https://example.com/image.png", "assistant");
+        Func<Task> action = async () => await agent.GenerateReplyAsync([imageMessage]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: ImageMessage");
+    }
+
+    [Fact]
+    public async Task ItProcessMultiModalMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().BeNullOrEmpty();
+                chatRequestMessage.Name.Should().Be("user");
+                chatRequestMessage.MultimodalContentItems.Count().Should().Be(2);
+                chatRequestMessage.MultimodalContentItems.First().Should().BeOfType<ChatMessageTextContentItem>();
+                chatRequestMessage.MultimodalContentItems.Last().Should().BeOfType<ChatMessageImageContentItem>();
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new MultiModalMessage(
+            Role.User,
+            [
+                new TextMessage(Role.User, "Hello", "user"),
+                new ImageMessage(Role.User, "https://example.com/image.png", "user"),
+            ], "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenProcessingMultiModalMessageFromSelfAndStrictModeIsTrueAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        var multiModalMessage = new MultiModalMessage(
+            Role.Assistant,
+            [
+                new TextMessage(Role.User, "Hello", "assistant"),
+                new ImageMessage(Role.User, "https://example.com/image.png", "assistant"),
+            ], "assistant");
+
+        Func<Task> action = async () => await agent.GenerateReplyAsync([multiModalMessage]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: MultiModalMessage");
+    }
+
+    [Fact]
+    public async Task ItProcessToolCallMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Name.Should().Be("assistant");
+                chatRequestMessage.ToolCalls.Count().Should().Be(1);
+                chatRequestMessage.Content.Should().Be("textContent");
+                chatRequestMessage.ToolCalls.First().Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                var functionToolCall = (ChatCompletionsFunctionToolCall)chatRequestMessage.ToolCalls.First();
+                functionToolCall.Name.Should().Be("test");
+                functionToolCall.Id.Should().Be("test");
+                functionToolCall.Arguments.Should().Be("test");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new ToolCallMessage("test", "test", "assistant")
+        {
+            Content = "textContent",
+        };
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessParallelToolCallMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().BeNullOrEmpty();
+                chatRequestMessage.Name.Should().Be("assistant");
+                chatRequestMessage.ToolCalls.Count().Should().Be(2);
+                for (int i = 0; i < chatRequestMessage.ToolCalls.Count(); i++)
+                {
+                    chatRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                    var functionToolCall = (ChatCompletionsFunctionToolCall)chatRequestMessage.ToolCalls.ElementAt(i);
+                    functionToolCall.Name.Should().Be("test");
+                    functionToolCall.Id.Should().Be($"test_{i}");
+                    functionToolCall.Arguments.Should().Be("test");
+                }
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCalls = new[]
+        {
+            new ToolCall("test", "test"),
+            new ToolCall("test", "test"),
+        };
+        IMessage message = new ToolCallMessage(toolCalls, "assistant");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItThrowExceptionWhenProcessingToolCallMessageFromUserAndStrictModeIsTrueAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector(strictMode: true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        var toolCallMessage = new ToolCallMessage("test", "test", "user");
+        Func<Task> action = async () => await agent.GenerateReplyAsync([toolCallMessage]);
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid message type: ToolCallMessage");
+    }
+
+    [Fact]
+    public async Task ItProcessToolCallResultMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("result");
+                chatRequestMessage.ToolCallId.Should().Be("test");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        IMessage message = new ToolCallResultMessage("result", "test", "test", "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessParallelToolCallResultMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(2);
+
+                for (int i = 0; i < msgs.Count(); i++)
+                {
+                    var innerMessage = msgs.ElementAt(i);
+                    innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                    var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                    chatRequestMessage.Content.Should().Be("result");
+                    chatRequestMessage.ToolCallId.Should().Be($"test_{i}");
+                }
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCalls = new[]
+        {
+            new ToolCall("test", "test", "result"),
+            new ToolCall("test", "test", "result"),
+        };
+        IMessage message = new ToolCallResultMessage(toolCalls, "user");
+        await agent.GenerateReplyAsync([message]);
+    }
+
+    [Fact]
+    public async Task ItProcessFunctionCallMiddlewareMessageFromUserAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(1);
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestUserMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("result");
+                chatRequestMessage.Name.Should().Be("user");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCallMessage = new ToolCallMessage("test", "test", "user");
+        var toolCallResultMessage = new ToolCallResultMessage("result", "test", "test", "user");
+        var aggregateMessage = new AggregateMessage<ToolCallMessage, ToolCallResultMessage>(toolCallMessage, toolCallResultMessage, "user");
+        await agent.GenerateReplyAsync([aggregateMessage]);
+    }
+
+    [Fact]
+    public async Task ItProcessFunctionCallMiddlewareMessageFromAssistantAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(2);
+                var innerMessage = msgs.Last();
+                innerMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var chatRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)innerMessage!).Content;
+                chatRequestMessage.Content.Should().Be("result");
+                chatRequestMessage.ToolCallId.Should().Be("test");
+
+                var toolCallMessage = msgs.First();
+                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var toolCallRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)toolCallMessage!).Content;
+                toolCallRequestMessage.Content.Should().BeNullOrEmpty();
+                toolCallRequestMessage.ToolCalls.Count().Should().Be(1);
+                toolCallRequestMessage.ToolCalls.First().Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                var functionToolCall = (ChatCompletionsFunctionToolCall)toolCallRequestMessage.ToolCalls.First();
+                functionToolCall.Name.Should().Be("test");
+                functionToolCall.Id.Should().Be("test");
+                functionToolCall.Arguments.Should().Be("test");
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCallMessage = new ToolCallMessage("test", "test", "assistant");
+        var toolCallResultMessage = new ToolCallResultMessage("result", "test", "test", "assistant");
+        var aggregateMessage = new ToolCallAggregateMessage(toolCallMessage, toolCallResultMessage, "assistant");
+        await agent.GenerateReplyAsync([aggregateMessage]);
+    }
+
+    [Fact]
+    public async Task ItProcessParallelFunctionCallMiddlewareMessageFromAssistantAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(async (msgs, _, innerAgent, _) =>
+            {
+                msgs.Count().Should().Be(3);
+                var toolCallMessage = msgs.First();
+                toolCallMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                var toolCallRequestMessage = (ChatRequestAssistantMessage)((MessageEnvelope<ChatRequestMessage>)toolCallMessage!).Content;
+                toolCallRequestMessage.Content.Should().BeNullOrEmpty();
+                toolCallRequestMessage.ToolCalls.Count().Should().Be(2);
+
+                for (int i = 0; i < toolCallRequestMessage.ToolCalls.Count(); i++)
+                {
+                    toolCallRequestMessage.ToolCalls.ElementAt(i).Should().BeOfType<ChatCompletionsFunctionToolCall>();
+                    var functionToolCall = (ChatCompletionsFunctionToolCall)toolCallRequestMessage.ToolCalls.ElementAt(i);
+                    functionToolCall.Name.Should().Be("test");
+                    functionToolCall.Id.Should().Be($"test_{i}");
+                    functionToolCall.Arguments.Should().Be("test");
+                }
+
+                for (int i = 1; i < msgs.Count(); i++)
+                {
+                    var toolCallResultMessage = msgs.ElementAt(i);
+                    toolCallResultMessage!.Should().BeOfType<MessageEnvelope<ChatRequestMessage>>();
+                    var toolCallResultRequestMessage = (ChatRequestToolMessage)((MessageEnvelope<ChatRequestMessage>)toolCallResultMessage!).Content;
+                    toolCallResultRequestMessage.Content.Should().Be("result");
+                    toolCallResultRequestMessage.ToolCallId.Should().Be($"test_{i - 1}");
+                }
+
+                return await innerAgent.GenerateReplyAsync(msgs);
+            })
+            .RegisterMiddleware(middleware);
+
+        // user message
+        var toolCalls = new[]
+        {
+            new ToolCall("test", "test", "result"),
+            new ToolCall("test", "test", "result"),
+        };
+        var toolCallMessage = new ToolCallMessage(toolCalls, "assistant");
+        var toolCallResultMessage = new ToolCallResultMessage(toolCalls, "assistant");
+        var aggregateMessage = new AggregateMessage<ToolCallMessage, ToolCallResultMessage>(toolCallMessage, toolCallResultMessage, "assistant");
+        await agent.GenerateReplyAsync([aggregateMessage]);
+    }
+
+    [Fact]
+    public async Task ItConvertChatResponseMessageToTextMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // text message
+        var textMessage = CreateInstance<ChatResponseMessage>(ChatRole.Assistant, "hello");
+        var chatRequestMessage = MessageEnvelope.Create(textMessage);
+
+        var message = await agent.GenerateReplyAsync([chatRequestMessage]);
+        message.Should().BeOfType<TextMessage>();
+        message.GetContent().Should().Be("hello");
+        message.GetRole().Should().Be(Role.Assistant);
+    }
+
+    [Fact]
+    public async Task ItConvertChatResponseMessageToToolCallMessageAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // tool call message
+        var toolCallMessage = CreateInstance<ChatResponseMessage>(ChatRole.Assistant, "textContent", new[] { new ChatCompletionsFunctionToolCall("test", "test", "test") }, new FunctionCall("test", "test"), CreateInstance<AzureChatExtensionsMessageContext>(), new Dictionary<string, BinaryData>());
+        var chatRequestMessage = MessageEnvelope.Create(toolCallMessage);
+        var message = await agent.GenerateReplyAsync([chatRequestMessage]);
+        message.Should().BeOfType<ToolCallMessage>();
+        message.GetToolCalls()!.Count().Should().Be(1);
+        message.GetToolCalls()!.First().FunctionName.Should().Be("test");
+        message.GetToolCalls()!.First().FunctionArguments.Should().Be("test");
+        message.GetContent().Should().Be("textContent");
+    }
+
+    [Fact]
+    public async Task ItReturnOriginalMessageWhenStrictModeIsFalseAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector();
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // text message
+        var textMessage = "hello";
+        var messageToSend = MessageEnvelope.Create(textMessage);
+
+        var message = await agent.GenerateReplyAsync([messageToSend]);
+        message.Should().BeOfType<MessageEnvelope<string>>();
+    }
+
+    [Fact]
+    public async Task ItThrowInvalidOperationExceptionWhenStrictModeIsTrueAsync()
+    {
+        var middleware = new OpenAIChatRequestMessageConnector(true);
+        var agent = new EchoAgent("assistant")
+            .RegisterMiddleware(middleware);
+
+        // text message
+        var textMessage = new ChatRequestUserMessage("hello");
+        var messageToSend = MessageEnvelope.Create(textMessage);
+        Func<Task> action = async () => await agent.GenerateReplyAsync([messageToSend]);
+
+        await action.Should().ThrowAsync<InvalidOperationException>().WithMessage("Invalid return message type MessageEnvelope`1");
+    }
+
+    [Fact]
+    public void ToOpenAIChatRequestMessageShortCircuitTest()
+    {
+        var agent = new EchoAgent("assistant");
+        var middleware = new OpenAIChatRequestMessageConnector();
+        ChatRequestMessage[] messages =
+            [
+                new ChatRequestUserMessage("Hello"),
+                new ChatRequestAssistantMessage("How can I help you?"),
+                new ChatRequestSystemMessage("You are a helpful AI assistant"),
+                new ChatRequestFunctionMessage("result", "functionName"),
+                new ChatRequestToolMessage("test", "test"),
+            ];
+
+        foreach (var oaiMessage in messages)
+        {
+            IMessage message = new MessageEnvelope<ChatRequestMessage>(oaiMessage);
+            var oaiMessages = middleware.ProcessIncomingMessages(agent, [message]);
+            oaiMessages.Count().Should().Be(1);
+            //oaiMessages.First().Should().BeOfType<IMessage<ChatRequestMessage>>();
+            if (oaiMessages.First() is IMessage<ChatRequestMessage> chatRequestMessage)
+            {
+                chatRequestMessage.Content.Should().Be(oaiMessage);
+            }
+            else
+            {
+                // fail the test
+                Assert.True(false);
+            }
+        }
+    }
+    private void VerifyOAIMessages(IEnumerable<(IMessage, IEnumerable<IMessage>)> messages)
+    {
+        var jsonObjects = messages.Select(pair =>
+        {
+            var (originalMessage, ms) = pair;
+            var objs = new List<object>();
+            foreach (var m in ms)
+            {
+                object? obj = null;
+                var chatRequestMessage = (m as IMessage<ChatRequestMessage>)?.Content;
+                if (chatRequestMessage is ChatRequestUserMessage userMessage)
+                {
+                    obj = new
+                    {
+                        Role = userMessage.Role.ToString(),
+                        Content = userMessage.Content,
+                        Name = userMessage.Name,
+                        MultiModaItem = userMessage.MultimodalContentItems?.Select(item =>
+                        {
+                            return item switch
+                            {
+                                ChatMessageImageContentItem imageContentItem => new
+                                {
+                                    Type = "Image",
+                                    ImageUrl = GetImageUrlFromContent(imageContentItem),
+                                } as object,
+                                ChatMessageTextContentItem textContentItem => new
+                                {
+                                    Type = "Text",
+                                    Text = textContentItem.Text,
+                                } as object,
+                                _ => throw new System.NotImplementedException(),
+                            };
+                        }),
+                    };
+                }
+
+                if (chatRequestMessage is ChatRequestAssistantMessage assistantMessage)
+                {
+                    obj = new
+                    {
+                        Role = assistantMessage.Role.ToString(),
+                        Content = assistantMessage.Content,
+                        Name = assistantMessage.Name,
+                        TooCall = assistantMessage.ToolCalls.Select(tc =>
+                        {
+                            return tc switch
+                            {
+                                ChatCompletionsFunctionToolCall functionToolCall => new
+                                {
+                                    Type = "Function",
+                                    Name = functionToolCall.Name,
+                                    Arguments = functionToolCall.Arguments,
+                                    Id = functionToolCall.Id,
+                                } as object,
+                                _ => throw new System.NotImplementedException(),
+                            };
+                        }),
+                        FunctionCallName = assistantMessage.FunctionCall?.Name,
+                        FunctionCallArguments = assistantMessage.FunctionCall?.Arguments,
+                    };
+                }
+
+                if (chatRequestMessage is ChatRequestSystemMessage systemMessage)
+                {
+                    obj = new
+                    {
+                        Name = systemMessage.Name,
+                        Role = systemMessage.Role.ToString(),
+                        Content = systemMessage.Content,
+                    };
+                }
+
+                if (chatRequestMessage is ChatRequestFunctionMessage functionMessage)
+                {
+                    obj = new
+                    {
+                        Role = functionMessage.Role.ToString(),
+                        Content = functionMessage.Content,
+                        Name = functionMessage.Name,
+                    };
+                }
+
+                if (chatRequestMessage is ChatRequestToolMessage toolCallMessage)
+                {
+                    obj = new
+                    {
+                        Role = toolCallMessage.Role.ToString(),
+                        Content = toolCallMessage.Content,
+                        ToolCallId = toolCallMessage.ToolCallId,
+                    };
+                }
+
+                objs.Add(obj ?? throw new System.NotImplementedException());
+            }
+
+            return new
+            {
+                OriginalMessage = originalMessage.ToString(),
+                ConvertedMessages = objs,
+            };
+        });
+
+        var json = JsonSerializer.Serialize(jsonObjects, this.jsonSerializerOptions);
+        Approvals.Verify(json);
+    }
+
+    private object? GetImageUrlFromContent(ChatMessageImageContentItem content)
+    {
+        return content.GetType().GetProperty("ImageUrl", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)?.GetValue(content);
+    }
+
+    private static T CreateInstance<T>(params object[] args)
+    {
+        var type = typeof(T);
+        var instance = type.Assembly.CreateInstance(
+            type.FullName!, false,
+            BindingFlags.Instance | BindingFlags.NonPublic,
+            null, args, null, null);
+        return (T)instance!;
+    }
+}
diff --git a/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj b/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj
index 7f42b67da71..6ff942ea3ba 100644
--- a/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj
+++ b/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj
@@ -10,10 +10,10 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <ProjectReference Include="..\..\src\AutoGen.OpenAI\AutoGen.OpenAI.csproj" />
     <ProjectReference Include="..\..\src\AutoGen.SemanticKernel\AutoGen.SemanticKernel.csproj" />
     <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
-    <ProjectReference Include="..\..\src\AutoGen\AutoGen.csproj" />
-    <ProjectReference Include="..\AutoGen.Tests\AutoGen.Tests.csproj" />
+    <ProjectReference Include="..\AutoGen.Test.Share\AutoGen.Tests.Share.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/dotnet/test/AutoGen.SemanticKernel.Tests/KernelFunctionMiddlewareTests.cs b/dotnet/test/AutoGen.SemanticKernel.Tests/KernelFunctionMiddlewareTests.cs
index 3e955c8ecbc..0dc2ea215dd 100644
--- a/dotnet/test/AutoGen.SemanticKernel.Tests/KernelFunctionMiddlewareTests.cs
+++ b/dotnet/test/AutoGen.SemanticKernel.Tests/KernelFunctionMiddlewareTests.cs
@@ -5,6 +5,7 @@
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
 using AutoGen.Tests;
+using Azure;
 using Azure.AI.OpenAI;
 using FluentAssertions;
 using Microsoft.SemanticKernel;
@@ -19,13 +20,15 @@ public async Task ItRegisterKernelFunctionMiddlewareFromTestPluginTests()
         var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
         var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
-        var openaiClient = new OpenAIClient(new Uri(endpoint), new Azure.AzureKeyCredential(key));
+        var openaiClient = new AzureOpenAIClient(
+            endpoint: new Uri(endpoint),
+            credential: new AzureKeyCredential(key));
 
         var kernel = new Kernel();
         var plugin = kernel.ImportPluginFromType<TestPlugin>();
         var kernelFunctionMiddleware = new KernelPluginMiddleware(kernel, plugin);
 
-        var agent = new OpenAIChatAgent(openaiClient, "assistant", modelName: deployName)
+        var agent = new OpenAIChatAgent(openaiClient.GetChatClient(deployName), "assistant")
             .RegisterMessageConnector()
             .RegisterMiddleware(kernelFunctionMiddleware);
 
@@ -63,7 +66,9 @@ public async Task ItRegisterKernelFunctionMiddlewareFromMethodTests()
         var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
         var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
-        var openaiClient = new OpenAIClient(new Uri(endpoint), new Azure.AzureKeyCredential(key));
+        var openaiClient = new AzureOpenAIClient(
+            endpoint: new Uri(endpoint),
+            credential: new AzureKeyCredential(key));
 
         var kernel = new Kernel();
         var getWeatherMethod = kernel.CreateFunctionFromMethod((string location) => $"The weather in {location} is sunny.", functionName: "GetWeather", description: "Get the weather for a location.");
@@ -71,7 +76,7 @@ public async Task ItRegisterKernelFunctionMiddlewareFromMethodTests()
         var plugin = kernel.ImportPluginFromFunctions("plugin", [getWeatherMethod, createPersonObjectMethod]);
         var kernelFunctionMiddleware = new KernelPluginMiddleware(kernel, plugin);
 
-        var agent = new OpenAIChatAgent(openaiClient, "assistant", modelName: deployName)
+        var agent = new OpenAIChatAgent(chatClient: openaiClient.GetChatClient(deployName), "assistant")
             .RegisterMessageConnector()
             .RegisterMiddleware(kernelFunctionMiddleware);
 
diff --git a/dotnet/test/AutoGen.SemanticKernel.Tests/SemanticKernelAgentTest.cs b/dotnet/test/AutoGen.SemanticKernel.Tests/SemanticKernelAgentTest.cs
index d9dde62f1bd..dc1b655a7a4 100644
--- a/dotnet/test/AutoGen.SemanticKernel.Tests/SemanticKernelAgentTest.cs
+++ b/dotnet/test/AutoGen.SemanticKernel.Tests/SemanticKernelAgentTest.cs
@@ -34,8 +34,11 @@ public async Task BasicConversationTestAsync()
         var builder = Kernel.CreateBuilder()
             .AddAzureOpenAIChatCompletion(deploymentName, endpoint, key);
 
+
         var kernel = builder.Build();
 
+        kernel.GetRequiredService<IChatCompletionService>();
+
         var skAgent = new SemanticKernelAgent(kernel, "assistant");
 
         var chatMessageContent = MessageEnvelope.Create(new ChatMessageContent(AuthorRole.Assistant, "Hello"));
@@ -223,11 +226,10 @@ public async Task SkChatCompletionAgentPluginTestAsync()
             Kernel = kernel,
             Name = "assistant",
             Instructions = "You are a helpful AI assistant",
-            ExecutionSettings =
-                new OpenAIPromptExecutionSettings()
-                {
-                    ToolCallBehavior = ToolCallBehavior.AutoInvokeKernelFunctions
-                }
+            Arguments = new KernelArguments(new OpenAIPromptExecutionSettings()
+            {
+                ToolCallBehavior = ToolCallBehavior.AutoInvokeKernelFunctions
+            })
         };
         var skAgent =
             new SemanticKernelChatCompletionAgent(agent).RegisterMiddleware(
diff --git a/dotnet/test/AutoGen.SourceGenerator.Tests/FunctionExample.test.cs b/dotnet/test/AutoGen.SourceGenerator.Tests/FunctionExample.test.cs
index 0096f2c157c..8b477446d9f 100644
--- a/dotnet/test/AutoGen.SourceGenerator.Tests/FunctionExample.test.cs
+++ b/dotnet/test/AutoGen.SourceGenerator.Tests/FunctionExample.test.cs
@@ -6,8 +6,8 @@
 using ApprovalTests.Namers;
 using ApprovalTests.Reporters;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 using FluentAssertions;
+using OpenAI.Chat;
 using Xunit;
 
 namespace AutoGen.SourceGenerator.Tests
@@ -30,7 +30,7 @@ public void Add_Test()
             };
 
             this.VerifyFunction(functionExamples.AddWrapper, args, 3);
-            this.VerifyFunctionDefinition(functionExamples.AddFunctionContract.ToOpenAIFunctionDefinition());
+            this.VerifyFunctionDefinition(functionExamples.AddFunctionContract.ToChatTool());
         }
 
         [Fact]
@@ -42,7 +42,7 @@ public void Sum_Test()
             };
 
             this.VerifyFunction(functionExamples.SumWrapper, args, 6.0);
-            this.VerifyFunctionDefinition(functionExamples.SumFunctionContract.ToOpenAIFunctionDefinition());
+            this.VerifyFunctionDefinition(functionExamples.SumFunctionContract.ToChatTool());
         }
 
         [Fact]
@@ -58,7 +58,7 @@ public async Task DictionaryToString_Test()
             };
 
             await this.VerifyAsyncFunction(functionExamples.DictionaryToStringAsyncWrapper, args, JsonSerializer.Serialize(args.xargs, jsonSerializerOptions));
-            this.VerifyFunctionDefinition(functionExamples.DictionaryToStringAsyncFunctionContract.ToOpenAIFunctionDefinition());
+            this.VerifyFunctionDefinition(functionExamples.DictionaryToStringAsyncFunctionContract.ToChatTool());
         }
 
         [Fact]
@@ -97,18 +97,18 @@ public void Query_Test()
             };
 
             this.VerifyFunction(functionExamples.QueryWrapper, args, new[] { "hello", "hello", "hello" });
-            this.VerifyFunctionDefinition(functionExamples.QueryFunctionContract.ToOpenAIFunctionDefinition());
+            this.VerifyFunctionDefinition(functionExamples.QueryFunctionContract.ToChatTool());
         }
 
         [UseReporter(typeof(DiffReporter))]
         [UseApprovalSubdirectory("ApprovalTests")]
-        private void VerifyFunctionDefinition(FunctionDefinition function)
+        private void VerifyFunctionDefinition(ChatTool function)
         {
             var func = new
             {
-                name = function.Name,
-                description = function.Description.Replace(Environment.NewLine, ","),
-                parameters = function.Parameters.ToObjectFromJson<object>(options: jsonSerializerOptions),
+                name = function.FunctionName,
+                description = function.FunctionDescription.Replace(Environment.NewLine, ","),
+                parameters = function.FunctionParameters.ToObjectFromJson<object>(options: jsonSerializerOptions),
             };
 
             Approvals.Verify(JsonSerializer.Serialize(func, jsonSerializerOptions));
diff --git a/dotnet/test/AutoGen.Test.Share/Attribute/EnvironmentSpecificFactAttribute.cs b/dotnet/test/AutoGen.Test.Share/Attribute/EnvironmentSpecificFactAttribute.cs
new file mode 100644
index 00000000000..1361531cc9e
--- /dev/null
+++ b/dotnet/test/AutoGen.Test.Share/Attribute/EnvironmentSpecificFactAttribute.cs
@@ -0,0 +1,31 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// EnvironmentSpecificFactAttribute.cs
+
+using Xunit;
+
+namespace AutoGen.Tests;
+
+/// <summary>
+/// A base class for environment-specific fact attributes.
+/// </summary>
+[AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = true)]
+public abstract class EnvironmentSpecificFactAttribute : FactAttribute
+{
+    private readonly string _skipMessage;
+
+    /// <summary>
+    /// Creates a new instance of the <see cref="EnvironmentSpecificFactAttribute" /> class.
+    /// </summary>
+    /// <param name="skipMessage">The message to be used when skipping the test marked with this attribute.</param>
+    protected EnvironmentSpecificFactAttribute(string skipMessage)
+    {
+        _skipMessage = skipMessage ?? throw new ArgumentNullException(nameof(skipMessage));
+    }
+
+    public sealed override string Skip => IsEnvironmentSupported() ? string.Empty : _skipMessage;
+
+    /// <summary>
+    /// A method used to evaluate whether to skip a test marked with this attribute. Skips iff this method evaluates to false.
+    /// </summary>
+    protected abstract bool IsEnvironmentSupported();
+}
diff --git a/dotnet/test/AutoGen.Test.Share/Attribute/OpenAIFact.cs b/dotnet/test/AutoGen.Test.Share/Attribute/OpenAIFact.cs
new file mode 100644
index 00000000000..54d72cd61ab
--- /dev/null
+++ b/dotnet/test/AutoGen.Test.Share/Attribute/OpenAIFact.cs
@@ -0,0 +1,22 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// OpenAIFact.cs
+
+namespace AutoGen.Tests;
+
+/// <summary>
+/// A fact for tests requiring OPENAI_API_KEY env.
+/// </summary>
+public sealed class ApiKeyFactAttribute : EnvironmentSpecificFactAttribute
+{
+    private readonly string[] _envVariableNames;
+    public ApiKeyFactAttribute(params string[] envVariableNames) : base($"{envVariableNames} is not found in env")
+    {
+        _envVariableNames = envVariableNames;
+    }
+
+    /// <inheritdoc />
+    protected override bool IsEnvironmentSupported()
+    {
+        return _envVariableNames.All(Environment.GetEnvironmentVariables().Contains);
+    }
+}
diff --git a/dotnet/test/AutoGen.Test.Share/AutoGen.Tests.Share.csproj b/dotnet/test/AutoGen.Test.Share/AutoGen.Tests.Share.csproj
new file mode 100644
index 00000000000..21c71896ddc
--- /dev/null
+++ b/dotnet/test/AutoGen.Test.Share/AutoGen.Tests.Share.csproj
@@ -0,0 +1,15 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>$(TestTargetFrameworks)</TargetFrameworks>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <IsPackable>false</IsPackable>
+    <IsTestProject>True</IsTestProject>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\AutoGen.Core\AutoGen.Core.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet/test/AutoGen.Test.Share/EchoAgent.cs b/dotnet/test/AutoGen.Test.Share/EchoAgent.cs
new file mode 100644
index 00000000000..010b72d2add
--- /dev/null
+++ b/dotnet/test/AutoGen.Test.Share/EchoAgent.cs
@@ -0,0 +1,37 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// EchoAgent.cs
+
+using System.Runtime.CompilerServices;
+using AutoGen.Core;
+
+namespace AutoGen.Tests;
+
+public class EchoAgent : IStreamingAgent
+{
+    public EchoAgent(string name)
+    {
+        Name = name;
+    }
+    public string Name { get; }
+
+    public Task<IMessage> GenerateReplyAsync(
+        IEnumerable<IMessage> conversation,
+        GenerateReplyOptions? options = null,
+        CancellationToken ct = default)
+    {
+        // return the most recent message
+        var lastMessage = conversation.Last();
+        lastMessage.From = this.Name;
+
+        return Task.FromResult(lastMessage);
+    }
+
+    public async IAsyncEnumerable<IMessage> GenerateStreamingReplyAsync(IEnumerable<IMessage> messages, GenerateReplyOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        foreach (var message in messages)
+        {
+            message.From = this.Name;
+            yield return message;
+        }
+    }
+}
diff --git a/dotnet/test/AutoGen.Tests/Attribute/EnvironmentSpecificFactAttribute.cs b/dotnet/test/AutoGen.Tests/Attribute/EnvironmentSpecificFactAttribute.cs
deleted file mode 100644
index 1042dec6f27..00000000000
--- a/dotnet/test/AutoGen.Tests/Attribute/EnvironmentSpecificFactAttribute.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-﻿// Copyright (c) Microsoft Corporation. All rights reserved.
-// EnvironmentSpecificFactAttribute.cs
-
-using System;
-using Xunit;
-
-namespace AutoGen.Tests
-{
-    /// <summary>
-    /// A base class for environment-specific fact attributes.
-    /// </summary>
-    [AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = true)]
-    public abstract class EnvironmentSpecificFactAttribute : FactAttribute
-    {
-        private readonly string _skipMessage;
-
-        /// <summary>
-        /// Creates a new instance of the <see cref="EnvironmentSpecificFactAttribute" /> class.
-        /// </summary>
-        /// <param name="skipMessage">The message to be used when skipping the test marked with this attribute.</param>
-        protected EnvironmentSpecificFactAttribute(string skipMessage)
-        {
-            _skipMessage = skipMessage ?? throw new ArgumentNullException(nameof(skipMessage));
-        }
-
-        public sealed override string Skip => IsEnvironmentSupported() ? string.Empty : _skipMessage;
-
-        /// <summary>
-        /// A method used to evaluate whether to skip a test marked with this attribute. Skips iff this method evaluates to false.
-        /// </summary>
-        protected abstract bool IsEnvironmentSupported();
-    }
-}
diff --git a/dotnet/test/AutoGen.Tests/Attribute/OpenAIFact.cs b/dotnet/test/AutoGen.Tests/Attribute/OpenAIFact.cs
deleted file mode 100644
index 44457d8f571..00000000000
--- a/dotnet/test/AutoGen.Tests/Attribute/OpenAIFact.cs
+++ /dev/null
@@ -1,26 +0,0 @@
-﻿// Copyright (c) Microsoft Corporation. All rights reserved.
-// OpenAIFact.cs
-
-using System;
-using System.Linq;
-
-namespace AutoGen.Tests
-{
-    /// <summary>
-    /// A fact for tests requiring OPENAI_API_KEY env.
-    /// </summary>
-    public sealed class ApiKeyFactAttribute : EnvironmentSpecificFactAttribute
-    {
-        private readonly string[] _envVariableNames;
-        public ApiKeyFactAttribute(params string[] envVariableNames) : base($"{envVariableNames} is not found in env")
-        {
-            _envVariableNames = envVariableNames;
-        }
-
-        /// <inheritdoc />
-        protected override bool IsEnvironmentSupported()
-        {
-            return _envVariableNames.All(Environment.GetEnvironmentVariables().Contains);
-        }
-    }
-}
diff --git a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj
index ce968b91f55..a0c3b815f22 100644
--- a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj
+++ b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj
@@ -12,6 +12,7 @@
     <ProjectReference Include="..\..\src\AutoGen.Anthropic\AutoGen.Anthropic.csproj" />
     <ProjectReference Include="..\..\src\AutoGen.SourceGenerator\AutoGen.SourceGenerator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
     <ProjectReference Include="..\..\src\AutoGen\AutoGen.csproj" />
+    <ProjectReference Include="..\AutoGen.Test.Share\AutoGen.Tests.Share.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/dotnet/test/AutoGen.Tests/BasicSampleTest.cs b/dotnet/test/AutoGen.Tests/BasicSampleTest.cs
index 89925b7d3b3..317fdc36e01 100644
--- a/dotnet/test/AutoGen.Tests/BasicSampleTest.cs
+++ b/dotnet/test/AutoGen.Tests/BasicSampleTest.cs
@@ -31,7 +31,7 @@ public async Task TwoAgentMathClassTestAsync()
             await Example02_TwoAgent_MathChat.RunAsync();
         }
 
-        [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
+        [ApiKeyFact("OPENAI_API_KEY")]
         public async Task AgentFunctionCallTestAsync()
         {
             await Example03_Agent_FunctionCall.RunAsync();
diff --git a/dotnet/test/AutoGen.Tests/EchoAgent.cs b/dotnet/test/AutoGen.Tests/EchoAgent.cs
deleted file mode 100644
index af5490218e8..00000000000
--- a/dotnet/test/AutoGen.Tests/EchoAgent.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-﻿// Copyright (c) Microsoft Corporation. All rights reserved.
-// EchoAgent.cs
-
-using System.Collections.Generic;
-using System.Linq;
-using System.Runtime.CompilerServices;
-using System.Threading;
-using System.Threading.Tasks;
-
-namespace AutoGen.Tests
-{
-    public class EchoAgent : IStreamingAgent
-    {
-        public EchoAgent(string name)
-        {
-            Name = name;
-        }
-        public string Name { get; }
-
-        public Task<IMessage> GenerateReplyAsync(
-            IEnumerable<IMessage> conversation,
-            GenerateReplyOptions? options = null,
-            CancellationToken ct = default)
-        {
-            // return the most recent message
-            var lastMessage = conversation.Last();
-            lastMessage.From = this.Name;
-
-            return Task.FromResult(lastMessage);
-        }
-
-        public async IAsyncEnumerable<IMessage> GenerateStreamingReplyAsync(IEnumerable<IMessage> messages, GenerateReplyOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
-        {
-            foreach (var message in messages)
-            {
-                message.From = this.Name;
-                yield return message;
-            }
-        }
-    }
-}
diff --git a/dotnet/test/AutoGen.Tests/MiddlewareTest.cs b/dotnet/test/AutoGen.Tests/MiddlewareTest.cs
index 6c1c89a33c1..6398a24f5c5 100644
--- a/dotnet/test/AutoGen.Tests/MiddlewareTest.cs
+++ b/dotnet/test/AutoGen.Tests/MiddlewareTest.cs
@@ -6,7 +6,6 @@
 using System.Linq;
 using System.Text.Json;
 using System.Threading.Tasks;
-using Azure.AI.OpenAI;
 using FluentAssertions;
 using Xunit;
 
@@ -73,7 +72,7 @@ public async Task FunctionCallMiddlewareTestAsync()
         var agent = new EchoAgent("echo");
         var args = new EchoSchema { message = "hello" };
         var argsJson = JsonSerializer.Serialize(args) ?? throw new InvalidOperationException("Failed to serialize args");
-        var functionCall = new FunctionCall("echo", argsJson);
+        var functionCall = new ToolCall("echo", argsJson);
         var functionCallAgent = agent.RegisterMiddleware(async (messages, options, agent, ct) =>
         {
             if (options?.Functions is null)
@@ -81,7 +80,7 @@ public async Task FunctionCallMiddlewareTestAsync()
                 return await agent.GenerateReplyAsync(messages, options, ct);
             }
 
-            return new ToolCallMessage(functionCall.Name, functionCall.Arguments, from: agent.Name);
+            return new ToolCallMessage(functionCall.FunctionName, functionCall.FunctionArguments, from: agent.Name);
         });
 
         // test 1
@@ -90,7 +89,7 @@ public async Task FunctionCallMiddlewareTestAsync()
             functionMap: new Dictionary<string, Func<string, Task<string>>> { { "echo", EchoWrapper } });
 
         var testAgent = agent.RegisterMiddleware(mw);
-        var functionCallMessage = new ToolCallMessage(functionCall.Name, functionCall.Arguments, from: "user");
+        var functionCallMessage = new ToolCallMessage(functionCall.FunctionName, functionCall.FunctionArguments, from: "user");
         var reply = await testAgent.SendAsync(functionCallMessage);
         reply.Should().BeOfType<ToolCallResultMessage>();
         reply.GetContent()!.Should().Be("[FUNC] hello");
diff --git a/dotnet/test/AutoGen.Tests/Orchestrator/RolePlayOrchestratorTests.cs b/dotnet/test/AutoGen.Tests/Orchestrator/RolePlayOrchestratorTests.cs
index 5a2cebb66cf..d4d602d8491 100644
--- a/dotnet/test/AutoGen.Tests/Orchestrator/RolePlayOrchestratorTests.cs
+++ b/dotnet/test/AutoGen.Tests/Orchestrator/RolePlayOrchestratorTests.cs
@@ -10,14 +10,18 @@
 using AutoGen.Anthropic;
 using AutoGen.Anthropic.Extensions;
 using AutoGen.Anthropic.Utils;
+using AutoGen.AzureAIInference;
+using AutoGen.AzureAIInference.Extension;
 using AutoGen.Gemini;
 using AutoGen.Mistral;
 using AutoGen.Mistral.Extension;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
+using Azure.AI.Inference;
 using Azure.AI.OpenAI;
 using FluentAssertions;
 using Moq;
+using OpenAI;
 using Xunit;
 
 namespace AutoGen.Tests;
@@ -218,11 +222,10 @@ public async Task GPT_3_5_CoderReviewerRunnerTestAsync()
         var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new Exception("Please set AZURE_OPENAI_ENDPOINT environment variable.");
         var key = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? throw new Exception("Please set AZURE_OPENAI_API_KEY environment variable.");
         var deployName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOY_NAME") ?? throw new Exception("Please set AZURE_OPENAI_DEPLOY_NAME environment variable.");
-        var openaiClient = new OpenAIClient(new Uri(endpoint), new Azure.AzureKeyCredential(key));
+        var openaiClient = new AzureOpenAIClient(new Uri(endpoint), new System.ClientModel.ApiKeyCredential(key));
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: deployName)
+            chatClient: openaiClient.GetChatClient(deployName),
+            name: "assistant")
             .RegisterMessageConnector();
 
         await CoderReviewerRunnerTestAsync(openAIChatAgent);
@@ -231,13 +234,12 @@ public async Task GPT_3_5_CoderReviewerRunnerTestAsync()
     [ApiKeyFact("OPENAI_API_KEY")]
     public async Task GPT_4o_CoderReviewerRunnerTestAsync()
     {
-        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY");
+        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new InvalidOperationException("OPENAI_API_KEY is not set");
         var model = "gpt-4o";
         var openaiClient = new OpenAIClient(apiKey);
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: model)
+            chatClient: openaiClient.GetChatClient(model),
+            name: "assistant")
             .RegisterMessageConnector();
 
         await CoderReviewerRunnerTestAsync(openAIChatAgent);
@@ -246,13 +248,12 @@ public async Task GPT_4o_CoderReviewerRunnerTestAsync()
     [ApiKeyFact("OPENAI_API_KEY")]
     public async Task GPT_4o_mini_CoderReviewerRunnerTestAsync()
     {
-        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY");
+        var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new InvalidOperationException("OPENAI_API_KEY is not set");
         var model = "gpt-4o-mini";
         var openaiClient = new OpenAIClient(apiKey);
         var openAIChatAgent = new OpenAIChatAgent(
-            openAIClient: openaiClient,
-            name: "assistant",
-            modelName: model)
+            chatClient: openaiClient.GetChatClient(model),
+            name: "assistant")
             .RegisterMessageConnector();
 
         await CoderReviewerRunnerTestAsync(openAIChatAgent);
@@ -304,6 +305,22 @@ public async Task Mistra_7b_CoderReviewerRunnerTestAsync()
         await CoderReviewerRunnerTestAsync(agent);
     }
 
+    [ApiKeyFact("GH_API_KEY")]
+    public async Task LLaMA_3_1_CoderReviewerRunnerTestAsync()
+    {
+        var apiKey = Environment.GetEnvironmentVariable("GH_API_KEY") ?? throw new InvalidOperationException("GH_API_KEY is not set.");
+        var endPoint = "https://models.inference.ai.azure.com";
+
+        var chatCompletionClient = new ChatCompletionsClient(new Uri(endPoint), new Azure.AzureKeyCredential(apiKey));
+        var agent = new ChatCompletionsClientAgent(
+            chatCompletionsClient: chatCompletionClient,
+            name: "assistant",
+            modelName: "Meta-Llama-3.1-70B-Instruct")
+            .RegisterMessageConnector();
+
+        await CoderReviewerRunnerTestAsync(agent);
+    }
+
     /// <summary>
     /// This test is to mimic the conversation among coder, reviewer and runner.
     /// The coder will write the code, the reviewer will review the code, and the runner will run the code.
diff --git a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs
index b545bbdbe86..fb28f48e12d 100644
--- a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs
+++ b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs
@@ -3,13 +3,8 @@
 
 using System;
 using System.Collections.Generic;
-using System.IO;
 using System.Linq;
 using System.Threading.Tasks;
-using AutoGen.LMStudio;
-using AutoGen.OpenAI;
-using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
 using FluentAssertions;
 using Xunit;
 using Xunit.Abstractions;
@@ -38,90 +33,6 @@ private ILLMConfig CreateOpenAIGPT4VisionConfig()
             return new OpenAIConfig(key, "gpt-4-vision-preview");
         }
 
-        [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
-        public async Task GPTAgentTestAsync()
-        {
-            var config = this.CreateAzureOpenAIGPT35TurboConfig();
-
-            var agent = new GPTAgent("gpt", "You are a helpful AI assistant", config);
-
-            await UpperCaseTestAsync(agent);
-            await UpperCaseStreamingTestAsync(agent);
-        }
-
-        [ApiKeyFact("OPENAI_API_KEY", "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT")]
-        public async Task GPTAgentVisionTestAsync()
-        {
-            var visionConfig = this.CreateOpenAIGPT4VisionConfig();
-            var visionAgent = new GPTAgent(
-                name: "gpt",
-                systemMessage: "You are a helpful AI assistant",
-                config: visionConfig,
-                temperature: 0);
-
-            var gpt3Config = this.CreateAzureOpenAIGPT35TurboConfig();
-            var gpt3Agent = new GPTAgent(
-                name: "gpt3",
-                systemMessage: "You are a helpful AI assistant, return highest label from conversation",
-                config: gpt3Config,
-                temperature: 0,
-                functions: new[] { this.GetHighestLabelFunctionContract.ToOpenAIFunctionDefinition() },
-                functionMap: new Dictionary<string, Func<string, Task<string>>>
-                {
-                    { nameof(GetHighestLabel), this.GetHighestLabelWrapper },
-                });
-
-            var imageUri = new Uri(@"https://microsoft.github.io/autogen/assets/images/level2algebra-659ba95286432d9945fc89e84d606797.png");
-            var oaiMessage = new ChatRequestUserMessage(
-                new ChatMessageTextContentItem("which label has the highest inference cost"),
-                new ChatMessageImageContentItem(imageUri));
-            var multiModalMessage = new MultiModalMessage(Role.User,
-                [
-                    new TextMessage(Role.User, "which label has the highest inference cost", from: "user"),
-                    new ImageMessage(Role.User, imageUri, from: "user"),
-                ],
-                from: "user");
-
-            var imageMessage = new ImageMessage(Role.User, imageUri, from: "user");
-
-            string imagePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ApprovalTests", "square.png");
-            ImageMessage imageMessageData;
-            using (var fs = new FileStream(imagePath, FileMode.Open, FileAccess.Read))
-            {
-                var ms = new MemoryStream();
-                await fs.CopyToAsync(ms);
-                ms.Seek(0, SeekOrigin.Begin);
-                var imageData = await BinaryData.FromStreamAsync(ms, "image/png");
-                imageMessageData = new ImageMessage(Role.Assistant, imageData, from: "user");
-            }
-
-            IMessage[] messages = [
-                MessageEnvelope.Create(oaiMessage),
-                multiModalMessage,
-                imageMessage,
-                imageMessageData
-                ];
-
-            foreach (var message in messages)
-            {
-                var response = await visionAgent.SendAsync(message);
-                response.From.Should().Be(visionAgent.Name);
-
-                var labelResponse = await gpt3Agent.SendAsync(response);
-                labelResponse.From.Should().Be(gpt3Agent.Name);
-                labelResponse.GetToolCalls()!.First().FunctionName.Should().Be(nameof(GetHighestLabel));
-            }
-        }
-
-        [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
-        public async Task GPTFunctionCallAgentTestAsync()
-        {
-            var config = this.CreateAzureOpenAIGPT35TurboConfig();
-            var agentWithFunction = new GPTAgent("gpt", "You are a helpful AI assistant", config, 0, functions: new[] { this.EchoAsyncFunctionContract.ToOpenAIFunctionDefinition() });
-
-            await EchoFunctionCallTestAsync(agentWithFunction);
-        }
-
         [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
         public async Task AssistantAgentFunctionCallTestAsync()
         {
@@ -147,43 +58,6 @@ public async Task AssistantAgentFunctionCallTestAsync()
             await EchoFunctionCallTestAsync(assistantAgent);
         }
 
-        [Fact]
-        public async Task ItCreateAssistantAgentFromLMStudioConfigAsync()
-        {
-            var host = "http://localhost";
-            var port = 8080;
-            var lmStudioConfig = new LMStudioConfig(host, port);
-
-            var assistantAgent = new AssistantAgent(
-                name: "assistant",
-                llmConfig: new ConversableAgentConfig()
-                {
-                    ConfigList = [lmStudioConfig],
-                });
-
-            assistantAgent.Name.Should().Be("assistant");
-            assistantAgent.InnerAgent.Should().BeOfType<LMStudioAgent>();
-        }
-
-        [ApiKeyFact("LMStudio_ENDPOINT")]
-        public async Task ItTestAssistantAgentFromLMStudioConfigAsync()
-        {
-            var Uri = Environment.GetEnvironmentVariable("LMStudio_ENDPOINT") ?? throw new ArgumentException("LMStudio_ENDPOINT is not set");
-            var lmStudioConfig = new LMStudioConfig(new Uri(Uri));
-
-            var assistantAgent = new AssistantAgent(
-                name: "assistant",
-                llmConfig: new ConversableAgentConfig()
-                {
-                    ConfigList = [lmStudioConfig],
-                });
-
-            assistantAgent.Name.Should().Be("assistant");
-            assistantAgent.InnerAgent.Should().BeOfType<LMStudioAgent>();
-            await this.UpperCaseTestAsync(assistantAgent);
-        }
-
-
         [Fact]
         public async Task AssistantAgentDefaultReplyTestAsync()
         {
@@ -225,25 +99,6 @@ public async Task AssistantAgentFunctionCallSelfExecutionTestAsync()
             await EchoFunctionCallExecutionTestAsync(assistantAgent);
         }
 
-        [ApiKeyFact("AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_DEPLOY_NAME")]
-        public async Task GPTAgentFunctionCallSelfExecutionTestAsync()
-        {
-            var config = this.CreateAzureOpenAIGPT35TurboConfig();
-            var agent = new GPTAgent(
-                name: "gpt",
-                systemMessage: "You are a helpful AI assistant",
-                config: config,
-                temperature: 0,
-                functions: new[] { this.EchoAsyncFunctionContract.ToOpenAIFunctionDefinition() },
-                functionMap: new Dictionary<string, Func<string, Task<string>>>
-                {
-                    { nameof(EchoAsync), this.EchoAsyncWrapper },
-                });
-
-            await EchoFunctionCallExecutionStreamingTestAsync(agent);
-            await EchoFunctionCallExecutionTestAsync(agent);
-        }
-
         /// <summary>
         /// echo when asked.
         /// </summary>
diff --git a/dotnet/test/AutoGen.Tests/TwoAgentTest.cs b/dotnet/test/AutoGen.Tests/TwoAgentTest.cs
index 100a22c04a7..335f4aaa57c 100644
--- a/dotnet/test/AutoGen.Tests/TwoAgentTest.cs
+++ b/dotnet/test/AutoGen.Tests/TwoAgentTest.cs
@@ -5,7 +5,6 @@
 using System.Collections.Generic;
 using System.Linq;
 using System.Threading.Tasks;
-using AutoGen.OpenAI;
 using FluentAssertions;
 using Xunit.Abstractions;
 
diff --git a/dotnet/test/AutoGen.WebAPI.Tests/OpenAIChatCompletionMiddlewareTests.cs b/dotnet/test/AutoGen.WebAPI.Tests/OpenAIChatCompletionMiddlewareTests.cs
index 07bdc850936..c56bbf98350 100644
--- a/dotnet/test/AutoGen.WebAPI.Tests/OpenAIChatCompletionMiddlewareTests.cs
+++ b/dotnet/test/AutoGen.WebAPI.Tests/OpenAIChatCompletionMiddlewareTests.cs
@@ -1,16 +1,16 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // OpenAIChatCompletionMiddlewareTests.cs
 
+using System.ClientModel.Primitives;
 using AutoGen.Core;
 using AutoGen.OpenAI;
 using AutoGen.OpenAI.Extension;
-using Azure.AI.OpenAI;
-using Azure.Core.Pipeline;
 using FluentAssertions;
 using Microsoft.AspNetCore.Hosting;
 using Microsoft.AspNetCore.TestHost;
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.Extensions.Hosting;
+using OpenAI;
 
 namespace AutoGen.WebAPI.Tests;
 
@@ -24,7 +24,7 @@ public async Task ItReturnTextMessageWhenSendTextMessage()
         using var host = await hostBuilder.StartAsync();
         var client = host.GetTestClient();
         var openaiClient = CreateOpenAIClient(client);
-        var openAIAgent = new OpenAIChatAgent(openaiClient, "test", "test")
+        var openAIAgent = new OpenAIChatAgent(openaiClient.GetChatClient("test"), "test")
             .RegisterMessageConnector();
 
         var response = await openAIAgent.SendAsync("Hey");
@@ -42,7 +42,7 @@ public async Task ItReturnTextMessageWhenSendTextMessageUseStreaming()
         using var host = await hostBuilder.StartAsync();
         var client = host.GetTestClient();
         var openaiClient = CreateOpenAIClient(client);
-        var openAIAgent = new OpenAIChatAgent(openaiClient, "test", "test")
+        var openAIAgent = new OpenAIChatAgent(openaiClient.GetChatClient("test"), "test")
             .RegisterMessageConnector();
 
         var message = new TextMessage(Role.User, "ABCDEFGHIJKLMN");
@@ -73,10 +73,9 @@ private IHostBuilder CreateHostBuilder(IAgent agent)
 
     private OpenAIClient CreateOpenAIClient(HttpClient client)
     {
-        var clientOption = new OpenAIClientOptions(OpenAIClientOptions.ServiceVersion.V2024_02_15_Preview)
+        return new OpenAIClient("api-key", new OpenAIClientOptions
         {
-            Transport = new HttpClientTransport(client),
-        };
-        return new OpenAIClient("api-key", clientOption);
+            Transport = new HttpClientPipelineTransport(client),
+        });
     }
 }
diff --git a/dotnet/website/articles/Agent-overview.md b/dotnet/website/articles/Agent-overview.md
index 0b84cdc49ac..586d231a6e7 100644
--- a/dotnet/website/articles/Agent-overview.md
+++ b/dotnet/website/articles/Agent-overview.md
@@ -8,7 +8,6 @@
 - Create an @AutoGen.OpenAI.OpenAIChatAgent: [Create an OpenAI chat agent](./OpenAIChatAgent-simple-chat.md)
 - Create a @AutoGen.SemanticKernel.SemanticKernelAgent: [Create a semantic kernel agent](./AutoGen.SemanticKernel/SemanticKernelAgent-simple-chat.md)
 - Create a @AutoGen.LMStudio.LMStudioAgent: [Connect to LM Studio](./Consume-LLM-server-from-LM-Studio.md)
-- Create your own agent: [Create your own agent](./Create-your-own-agent.md)
 
 ## Chat with an agent
 To chat with an agent, typically you can invoke @AutoGen.Core.IAgent.GenerateReplyAsync*. On top of that, you can also use one of the extension methods like @AutoGen.Core.AgentExtension.SendAsync* as shortcuts.
diff --git a/dotnet/website/articles/Installation.md b/dotnet/website/articles/Installation.md
index 3ec5d3a470f..30b55442d24 100644
--- a/dotnet/website/articles/Installation.md
+++ b/dotnet/website/articles/Installation.md
@@ -13,8 +13,9 @@ AutoGen.Net provides the following packages, you can choose to install one or mo
 - `AutoGen.LMStudio`: This package provides the integration agents from LM Studio.
 - `AutoGen.SemanticKernel`: This package provides the integration agents over semantic kernel.
 - `AutoGen.Gemini`: This package provides the integration agents from [Google Gemini](https://gemini.google.com/).
+- `AutoGen.AzureAIInference`: This package provides the integration agents for [Azure AI Inference](https://www.nuget.org/packages/Azure.AI.Inference).
 - `AutoGen.SourceGenerator`: This package carries a source generator that adds support for type-safe function definition generation.
-- `AutoGen.DotnetInteractive`: This packages carries dotnet interactive support to execute dotnet code snippet.
+- `AutoGen.DotnetInteractive`: This packages carries dotnet interactive support to execute code snippets. The current supported language is C#, F#, powershell and python.
 
 >[!Note]
 > Help me choose
diff --git a/dotnet/website/articles/Run-dotnet-code.md b/dotnet/website/articles/Run-dotnet-code.md
index e3d8fa78a0b..bee7e1aa3bb 100644
--- a/dotnet/website/articles/Run-dotnet-code.md
+++ b/dotnet/website/articles/Run-dotnet-code.md
@@ -16,17 +16,46 @@ For example, in data analysis scenario, agent can resolve tasks like "What is th
 > [!WARNING]
 > Running arbitrary code snippet from agent response could bring risks to your system. Using this feature with caution.
 
-## How to run dotnet code snippet?
+## Use dotnet interactive kernel to execute code snippet?
 The built-in feature of running dotnet code snippet is provided by [dotnet-interactive](https://github.com/dotnet/interactive). To run dotnet code snippet, you need to install the following package to your project, which provides the intergraion with dotnet-interactive:
 
 ```xml
 <PackageReference Include="AutoGen.DotnetInteractive" />
 ```
 
-Then you can use @AutoGen.DotnetInteractive.AgentExtension.RegisterDotnetCodeBlockExectionHook(AutoGen.IAgent,InteractiveService,System.String,System.String) to register a `reply hook` to run dotnet code snippet. The hook will check if a csharp code snippet is present in the most recent message from history, and run the code snippet if it is present.
-
-The following code snippet shows how to register a dotnet code snippet execution hook:
-
-[!code-csharp[](../../sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs?name=code_snippet_0_1)]
+Then you can use @AutoGen.DotnetInteractive.DotnetInteractiveKernelBuilder* to create a in-process dotnet-interactive composite kernel with C# and F# kernels.
 [!code-csharp[](../../sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs?name=code_snippet_1_1)]
+
+After that, use @AutoGen.DotnetInteractive.Extension.RunSubmitCodeCommandAsync* method to run code snippet. The method will return the result of the code snippet.
 [!code-csharp[](../../sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs?name=code_snippet_1_2)]
+
+## Run python code snippet
+To run python code, firstly you need to have python installed on your machine, then you need to set up ipykernel and jupyter in your environment.
+
+```bash
+pip install ipykernel
+pip install jupyter
+```
+
+After `ipykernel` and `jupyter` are installed, you can confirm the ipykernel is installed correctly by running the following command:
+
+```bash
+jupyter kernelspec list
+```
+
+The output should contain all available kernels, including `python3`.
+
+```bash
+Available kernels:
+    python3    /usr/local/share/jupyter/kernels/python3
+    ...
+```
+
+Then you can add the python kernel to the dotnet-interactive composite kernel by calling `AddPythonKernel` method.
+
+[!code-csharp[](../../sample/AutoGen.BasicSamples/CodeSnippet/RunCodeSnippetCodeSnippet.cs?name=code_snippet_1_4)]
+
+## Further reading
+You can refer to the following examples for running code snippet in agentic workflow:
+- Dynamic_GroupChat_Coding_Task:  [![](https://img.shields.io/badge/Open%20on%20Github-grey?logo=github)](https://github.com/microsoft/autogen/blob/main/dotnet/sample/AutoGen.BasicSample/Example04_Dynamic_GroupChat_Coding_Task.cs)
+- Dynamic_GroupChat_Calculate_Fibonacci: [![](https://img.shields.io/badge/Open%20on%20Github-grey?logo=github)](https://github.com/microsoft/autogen/blob/main/dotnet/sample/AutoGen.BasicSample/Example07_Dynamic_GroupChat_Calculate_Fibonacci.cs)
diff --git a/dotnet/website/articles/function-comparison-page-between-python-AutoGen-and-autogen.net.md b/dotnet/website/articles/function-comparison-page-between-python-AutoGen-and-autogen.net.md
new file mode 100644
index 00000000000..e81b96f11be
--- /dev/null
+++ b/dotnet/website/articles/function-comparison-page-between-python-AutoGen-and-autogen.net.md
@@ -0,0 +1,37 @@
+### Function comparison between Python AutoGen and AutoGen\.Net
+
+
+#### Agentic pattern
+
+| Feature | AutoGen | AutoGen\.Net |
+| :---------------- | :------ | :---- |
+| Code interpreter | run python code in local/docker/notebook executor | run csharp code in dotnet interactive executor |
+| Single agent chat pattern | ✔️ | ✔️ |
+| Two agent chat pattern | ✔️ | ✔️ |
+| group chat (include FSM)| ✔️ | ✔️ (using workflow for FSM groupchat) |
+| Nest chat| ✔️ | ✔️ (using middleware pattern)|
+|Sequential chat | ✔️ | ❌ (need to manually create task in code) |
+| Tool | ✔️ | ✔️ |
+
+
+#### LLM platform support
+
+ℹ️ Note 
+
+``` Other than the platforms list below, AutoGen.Net also supports all the platforms that semantic kernel supports via AutoGen.SemanticKernel as a bridge ```
+
+| Feature | AutoGen | AutoGen\.Net |
+| :---------------- | :------ | :---- |
+| OpenAI (include third-party) | ✔️ | ✔️ |
+| Mistral |	✔️|	✔️|
+| Ollama |	✔️|	✔️|
+|Claude	|✔️	|✔️|
+|Gemini (Include Vertex) | ✔️ | ✔️ |
+
+#### Popular Contrib Agent support
+
+
+| Feature | AutoGen | AutoGen\.Net |
+| :---------------- | :------ | :---- |
+| Rag Agent |	✔️|	❌ |
+| Web surfer |	✔️|	❌ |
diff --git a/dotnet/website/release_note/0.1.0.md b/dotnet/website/release_note/0.1.0.md
new file mode 100644
index 00000000000..dc844087758
--- /dev/null
+++ b/dotnet/website/release_note/0.1.0.md
@@ -0,0 +1,41 @@
+# 🎉 Release Notes: AutoGen.Net 0.1.0 🎉
+
+## 📦 New Packages
+
+1. **Add AutoGen.AzureAIInference Package**
+   - **Issue**: [.Net][Feature Request] [#3323](https://github.com/microsoft/autogen/issues/3323)
+   - **Description**: The new `AutoGen.AzureAIInference` package includes the `ChatCompletionClientAgent`.
+
+## ✨ New Features
+
+1. **Enable Step-by-Step Execution for Two Agent Chat API**
+   - **Issue**: [.Net][Feature Request] [#3339](https://github.com/microsoft/autogen/issues/3339)
+   - **Description**: The `AgentExtension.SendAsync` now returns an `IAsyncEnumerable`, allowing conversations to be driven step by step, similar to how `GroupChatExtension.SendAsync` works.
+
+2. **Support Python Code Execution in AutoGen.DotnetInteractive**
+   - **Issue**: [.Net][Feature Request] [#3316](https://github.com/microsoft/autogen/issues/3316)
+   - **Description**: `dotnet-interactive` now supports Jupyter kernel connection, allowing Python code execution in `AutoGen.DotnetInteractive`.
+
+3. **Support Prompt Cache in Claude**
+   - **Issue**: [.Net][Feature Request] [#3359](https://github.com/microsoft/autogen/issues/3359)
+   - **Description**: Claude now supports prompt caching, which dramatically lowers the bill if the cache is hit. Added the corresponding option in the Claude client.
+
+## 🐛 Bug Fixes
+
+1. **GroupChatExtension.SendAsync Doesn’t Terminate Chat When `IOrchestrator` Returns Null as Next Agent**
+   - **Issue**: [.Net][Bug] [#3306](https://github.com/microsoft/autogen/issues/3306)
+   - **Description**: Fixed an issue where `GroupChatExtension.SendAsync` would continue until the max_round is reached even when `IOrchestrator` returns null as the next speaker.
+
+2. **InitializedMessages Are Added Repeatedly in GroupChatExtension.SendAsync Method**
+   - **Issue**: [.Net][Bug] [#3268](https://github.com/microsoft/autogen/issues/3268)
+   - **Description**: Fixed an issue where initialized messages from group chat were being added repeatedly in every iteration of the `GroupChatExtension.SendAsync` API.
+
+3. **Remove `Azure.AI.OpenAI` Dependency from `AutoGen.DotnetInteractive`**
+   - **Issue**: [.Net][Feature Request] [#3273](https://github.com/microsoft/autogen/issues/3273)
+   - **Description**: Fixed an issue by removing the `Azure.AI.OpenAI` dependency from `AutoGen.DotnetInteractive`, simplifying the package and reducing dependencies.
+
+## 📄 Documentation Updates
+
+1. **Add Function Comparison Page Between Python AutoGen and AutoGen.Net**
+   - **Issue**: [.Net][Document] [#3184](https://github.com/microsoft/autogen/issues/3184)
+   - **Description**: Added comparative documentation for features between AutoGen and AutoGen.Net across various functionalities and platform supports.
\ No newline at end of file
diff --git a/dotnet/website/release_note/toc.yml b/dotnet/website/release_note/toc.yml
index f8753cacc89..9c8008e705e 100644
--- a/dotnet/website/release_note/toc.yml
+++ b/dotnet/website/release_note/toc.yml
@@ -1,3 +1,6 @@
+- name: 0.1.0
+  href: 0.1.0.md
+  
 - name: 0.0.17
   href: 0.0.17.md
 
diff --git a/dotnet/website/toc.yml b/dotnet/website/toc.yml
index ad5d0e2b695..18a7eae08a8 100644
--- a/dotnet/website/toc.yml
+++ b/dotnet/website/toc.yml
@@ -3,13 +3,16 @@
 
 - name: Tutorial
   href: tutorial/
-  
+
 - name: API Reference
   href: api/
 
 - name: Release Notes
   href: release_note/
 
+- name: Comparison between Python AutoGen and AutoGen.Net
+  href: articles/function-comparison-page-between-python-AutoGen-and-autogen.net.md
+
 - name: Other Languages
   dropdown: true
   items:
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index 6fefcd3ba44..eee192c4f82 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -66,7 +66,7 @@
     "import chromadb\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -92,7 +92,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -129,8 +129,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -141,15 +141,9 @@
     ")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/chromadb\n",
+    "# for more information on the RetrieveUserProxyAgent and ChromaVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -159,13 +153,10 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
-    "        # \"client\": chromadb.PersistentClient(path=\"/tmp/chromadb\"),  # deprecated, use \"vector_db\" instead\n",
-    "        \"vector_db\": \"chroma\",  # to use the deprecated `client` parameter, set to None and uncomment the line above\n",
+    "        \"vector_db\": \"chroma\",\n",
     "        \"overwrite\": False,  # set to True if you want to overwrite an existing collection\n",
     "        \"get_or_create\": True,  # set to False if don't want to reuse an existing collection\n",
     "    },\n",
@@ -196,8 +187,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-08-02 06:30:11,303 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `autogen-docs`.\u001b[0m\n",
-      "2024-08-02 06:30:11,485 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n"
+      "2024-08-14 06:22:06,884 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `autogen-docs`.\u001b[0m\n"
      ]
     },
     {
@@ -211,6 +201,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "2024-08-14 06:22:07,353 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n",
       "Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2\n"
      ]
     },
@@ -764,23 +755,22 @@
       "\n",
       "\n",
       "\n",
-      "--------------------------------------------------------------------------------\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\u001b[33massistant\u001b[0m (to ragproxyagent):\n",
       "\n",
-      "The author of FLAML is Chi Wang, along with several co-authors for various publications related to FLAML.\n",
+      "The authors of FLAML (Fast and Lightweight AutoML) as mentioned in the provided context are Chi Wang, Qingyun Wu, Markus Weimer, and Erkang Zhu. They are listed as the authors of the publication titled \"FLAML: A Fast and Lightweight AutoML Library\" which appeared in MLSys 2021.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "The authors of FLAML (Fast and Lightweight AutoML) as mentioned in the provided context are Chi Wang, Qingyun Wu, Markus Weimer, and Erkang Zhu. They are listed as the authors of the publication titled \"FLAML: A Fast and Lightweight AutoML Library\" which appeared in MLSys 2021.\n",
       "\n",
       "--------------------------------------------------------------------------------\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': 'You\\'re a retrieve augmented coding assistant. You answer user\\'s questions based on your own knowledge and the\\ncontext provided by the user.\\nIf you can\\'t answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\\nFor code generation, you must obey the following rules:\\nRule 1. You MUST NOT install any packages because all the packages needed are already installed.\\nRule 2. You must follow the formats below to write your code:\\n```language\\n# your code\\n```\\n\\nUser\\'s question is: Who is the author of FLAML?\\n\\nContext is: # Research\\n\\nFor technical details, please check our research publications.\\n\\n- [FLAML: A Fast and Lightweight AutoML Library](https://www.microsoft.com/en-us/research/publication/flaml-a-fast-and-lightweight-automl-library/). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. MLSys 2021.\\n\\n```bibtex\\n@inproceedings{wang2021flaml,\\n    title={FLAML: A Fast and Lightweight AutoML Library},\\n    author={Chi Wang and Qingyun Wu and Markus Weimer and Erkang Zhu},\\n    year={2021},\\n    booktitle={MLSys},\\n}\\n```\\n\\n- [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.\\n\\n```bibtex\\n@inproceedings{wu2021cfo,\\n    title={Frugal Optimization for Cost-related Hyperparameters},\\n    author={Qingyun Wu and Chi Wang and Silu Huang},\\n    year={2021},\\n    booktitle={AAAI},\\n}\\n```\\n\\n- [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.\\n\\n```bibtex\\n@inproceedings{wang2021blendsearch,\\n    title={Economical Hyperparameter Optimization With Blended Search Strategy},\\n    author={Chi Wang and Qingyun Wu and Silu Huang and Amin Saied},\\n    year={2021},\\n    booktitle={ICLR},\\n}\\n```\\n\\n- [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://aclanthology.org/2021.acl-long.178.pdf). Susan Xueqing Liu, Chi Wang. ACL 2021.\\n\\n```bibtex\\n@inproceedings{liuwang2021hpolm,\\n    title={An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models},\\n    author={Susan Xueqing Liu and Chi Wang},\\n    year={2021},\\n    booktitle={ACL},\\n}\\n```\\n\\n- [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021.\\n\\n```bibtex\\n@inproceedings{wu2021chacha,\\n    title={ChaCha for Online AutoML},\\n    author={Qingyun Wu and Chi Wang and John Langford and Paul Mineiro and Marco Rossi},\\n    year={2021},\\n    booktitle={ICML},\\n}\\n```\\n\\n- [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021).\\n\\n```bibtex\\n@inproceedings{wuwang2021fairautoml,\\n    title={Fair AutoML},\\n    author={Qingyun Wu and Chi Wang},\\n    year={2021},\\n    booktitle={ArXiv preprint arXiv:2111.06495},\\n}\\n```\\n\\n- [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022).\\n\\n```bibtex\\n@inproceedings{kayaliwang2022default,\\n    title={Mining Robust Default Configurations for Resource-constrained AutoML},\\n    author={Moe Kayali and Chi Wang},\\n    year={2022},\\n    booktitle={ArXiv preprint arXiv:2202.09927},\\n}\\n```\\n\\n- [Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives](https://openreview.net/forum?id=0Ij9_q567Ma). Shaokun Zhang, Feiran Jia, Chi Wang, Qingyun Wu. ICLR 2023 (notable-top-5%).\\n\\n```bibtex\\n@inproceedings{zhang2023targeted,\\n    title={Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives},\\n    author={Shaokun Zhang and Feiran Jia and Chi Wang and Qingyun Wu},\\n    booktitle={International Conference on Learning Representations},\\n    year={2023},\\n    url={https://openreview.net/forum?id=0Ij9_q567Ma},\\n}\\n```\\n\\n- [Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference](https://arxiv.org/abs/2303.04673). Chi Wang, Susan Xueqing Liu, Ahmed H. Awadallah. ArXiv preprint arXiv:2303.04673 (2023).\\n\\n```bibtex\\n@inproceedings{wang2023EcoOptiGen,\\n    title={Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference},\\n    author={Chi Wang and Susan Xueqing Liu and Ahmed H. Awadallah},\\n    year={2023},\\n    booktitle={ArXiv preprint arXiv:2303.04673},\\n}\\n```\\n\\n- [An Empirical Study on Challenging Math Problem Solving with GPT-4](https://arxiv.org/abs/2306.01337). Yiran Wu, Feiran Jia, Shaokun Zhang, Hangyu Li, Erkang Zhu, Yue Wang, Yin Tat Lee, Richard Peng, Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2306.01337 (2023).\\n\\n```bibtex\\n@inproceedings{wu2023empirical,\\n    title={An Empirical Study on Challenging Math Problem Solving with GPT-4},\\n    author={Yiran Wu and Feiran Jia and Shaokun Zhang and Hangyu Li and Erkang Zhu and Yue Wang and Yin Tat Lee and Richard Peng and Qingyun Wu and Chi Wang},\\n    year={2023},\\n    booktitle={ArXiv preprint arXiv:2306.01337},\\n}\\n```\\n# Integrate - Spark\\n\\nFLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\\n\\n- Use Spark ML estimators for AutoML.\\n- Use Spark to run training in parallel spark jobs.\\n\\n## Spark ML Estimators\\n\\nFLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\\n\\n### Data\\n\\nFor Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\\n\\nThis utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\\n\\nThis function also accepts optional arguments `index_col` and `default_index_type`.\\n\\n- `index_col` is the column name to use as the index, default is None.\\n- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\\n\\nHere is an example code snippet for Spark Data:\\n\\n```python\\nimport pandas as pd\\nfrom flaml.automl.spark.utils import to_pandas_on_spark\\n\\n# Creating a dictionary\\ndata = {\\n    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\\n    \"Age_Years\": [20, 15, 10, 7, 25],\\n    \"Price\": [100000, 200000, 300000, 240000, 120000],\\n}\\n\\n# Creating a pandas DataFrame\\ndataframe = pd.DataFrame(data)\\nlabel = \"Price\"\\n\\n# Convert to pandas-on-spark dataframe\\npsdf = to_pandas_on_spark(dataframe)\\n```\\n\\nTo use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\\n\\nHere is an example of how to use it:\\n\\n```python\\nfrom pyspark.ml.feature import VectorAssembler\\n\\ncolumns = psdf.columns\\nfeature_cols = [col for col in columns if col != label]\\nfeaturizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\\npsdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\\n```\\n\\nLater in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\\n\\n### Estimators\\n\\n#### Model List\\n\\n- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\\n\\n#### Usage\\n\\nFirst, prepare your data in the required format as described in the previous section.\\n\\nBy including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven\\'t specified them.\\n\\nHere is an example code snippet using SparkML models in AutoML:\\n\\n```python\\nimport flaml\\n\\n# prepare your data in pandas-on-spark format as we previously mentioned\\n\\nautoml = flaml.AutoML()\\nsettings = {\\n    \"time_budget\": 30,\\n    \"metric\": \"r2\",\\n    \"estimator_list\": [\"lgbm_spark\"],  # this setting is optional\\n    \"task\": \"regression\",\\n}\\n\\nautoml.fit(\\n    dataframe=psdf,\\n    label=label,\\n    **settings,\\n)\\n```\\n\\n[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb)\\n\\n## Parallel Spark Jobs\\n\\nYou can activate Spark as the parallel backend during parallel tuning in both [AutoML](/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning) and [Hyperparameter Tuning](/docs/Use-Cases/Tune-User-Defined-Function#parallel-tuning), by setting the `use_spark` to `true`. FLAML will dispatch your job to the distributed Spark backend using [`joblib-spark`](https://github.com/joblib/joblib-spark).\\n\\nPlease note that you should not set `use_spark` to `true` when applying AutoML and Tuning for Spark Data. This is because only SparkML models will be used for Spark Data in AutoML and Tuning. As SparkML models run in parallel, there is no need to distribute them with `use_spark` again.\\n\\nAll the Spark-related arguments are stated below. These arguments are available in both Hyperparameter Tuning and AutoML:\\n\\n- `use_spark`: boolean, default=False | Whether to use spark to run the training in parallel spark jobs. This can be used to accelerate training on large models and large datasets, but will incur more overhead in time and thus slow down training in some cases. GPU training is not supported yet when use_spark is True. For Spark clusters, by default, we will launch one trial per executor. However, sometimes we want to launch more trials than the number of executors (e.g., local mode). In this case, we can set the environment variable `FLAML_MAX_CONCURRENT` to override the detected `num_executors`. The final number of concurrent trials will be the minimum of `n_concurrent_trials` and `num_executors`.\\n- `n_concurrent_trials`: int, default=1 | The number of concurrent trials. When n_concurrent_trials > 1, FLAML performes parallel tuning.\\n- `force_cancel`: boolean, default=False | Whether to forcely cancel Spark jobs if the search time exceeded the time budget. Spark jobs include parallel tuning jobs and Spark-based model training jobs.\\n\\nAn example code snippet for using parallel Spark jobs:\\n\\n```python\\nimport flaml\\n\\nautoml_experiment = flaml.AutoML()\\nautoml_settings = {\\n    \"time_budget\": 30,\\n    \"metric\": \"r2\",\\n    \"task\": \"regression\",\\n    \"n_concurrent_trials\": 2,\\n    \"use_spark\": True,\\n    \"force_cancel\": True,  # Activating the force_cancel option can immediately halt Spark jobs once they exceed the allocated time_budget.\\n}\\n\\nautoml.fit(\\n    dataframe=dataframe,\\n    label=label,\\n    **automl_settings,\\n)\\n```\\n\\n[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\\n\\n', 'role': 'assistant'}, {'content': 'The author of FLAML is Chi Wang, along with several co-authors for various publications related to FLAML.', 'role': 'user'}], summary='The author of FLAML is Chi Wang, along with several co-authors for various publications related to FLAML.', cost=({'total_cost': 0.004711, 'gpt-35-turbo': {'cost': 0.004711, 'prompt_tokens': 3110, 'completion_tokens': 23, 'total_tokens': 3133}}, {'total_cost': 0}), human_input=[])"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
diff --git a/notebook/agentchat_RetrieveChat_mongodb.ipynb b/notebook/agentchat_RetrieveChat_mongodb.ipynb
index 0f24cf16579..09c3c44bef2 100644
--- a/notebook/agentchat_RetrieveChat_mongodb.ipynb
+++ b/notebook/agentchat_RetrieveChat_mongodb.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -58,7 +58,7 @@
     "import os\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -83,7 +83,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -111,8 +111,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -123,16 +123,9 @@
     ")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
-    "# **NOTE** Upon the first time adding in the documents, initial query may be slower due to index creation and document indexing time\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/mongodb\n",
+    "# for more information on the RetrieveUserProxyAgent and MongoDBAtlasVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -142,9 +135,7 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
     "        \"vector_db\": \"mongodb\",  # MongoDB Atlas database\n",
diff --git a/notebook/agentchat_RetrieveChat_pgvector.ipynb b/notebook/agentchat_RetrieveChat_pgvector.ipynb
index 1a8d70e2965..4d9dd44c33d 100644
--- a/notebook/agentchat_RetrieveChat_pgvector.ipynb
+++ b/notebook/agentchat_RetrieveChat_pgvector.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -92,29 +92,13 @@
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
     "# a vector database instance\n",
     "from autogen.retrieve_utils import TEXT_FORMATS\n",
     "\n",
-    "config_list = [\n",
-    "    {\n",
-    "        \"model\": \"Meta-Llama-3-8B-Instruct-imatrix\",\n",
-    "        \"api_key\": \"YOUR_API_KEY\",\n",
-    "        \"base_url\": \"http://localhost:8080/v1\",\n",
-    "        \"api_type\": \"openai\",\n",
-    "    },\n",
-    "    {\"model\": \"gpt-3.5-turbo-0125\", \"api_key\": \"YOUR_API_KEY\", \"api_type\": \"openai\"},\n",
-    "    {\n",
-    "        \"model\": \"gpt-35-turbo\",\n",
-    "        \"base_url\": \"...\",\n",
-    "        \"api_type\": \"azure\",\n",
-    "        \"api_version\": \"2023-07-01-preview\",\n",
-    "        \"api_key\": \"...\",\n",
-    "    },\n",
-    "]\n",
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    file_location=\".\",\n",
@@ -136,7 +120,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -173,8 +157,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant. You must always reply with some form of text.\",\n",
     "    llm_config={\n",
@@ -191,15 +175,9 @@
     "sentence_transformer_ef = SentenceTransformer(\"all-distilroberta-v1\").encode\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/pgvectordb\n",
+    "# for more information on the RetrieveUserProxyAgent and PGVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -209,9 +187,7 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
     "        \"vector_db\": \"pgvector\",  # PGVector database\n",
diff --git a/notebook/agentchat_RetrieveChat_qdrant.ipynb b/notebook/agentchat_RetrieveChat_qdrant.ipynb
index b5bc2f681d2..0035a8e3081 100644
--- a/notebook/agentchat_RetrieveChat_qdrant.ipynb
+++ b/notebook/agentchat_RetrieveChat_qdrant.ipynb
@@ -9,10 +9,10 @@
     "\n",
     "[Qdrant](https://qdrant.tech/) is a high-performance vector search engine/database.\n",
     "\n",
-    "This notebook demonstrates the usage of `QdrantRetrieveUserProxyAgent` for RAG, based on [agentchat_RetrieveChat.ipynb](https://colab.research.google.com/github/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb).\n",
+    "This notebook demonstrates the usage of Qdrant for RAG, based on [agentchat_RetrieveChat.ipynb](https://colab.research.google.com/github/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb).\n",
     "\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieve augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `QdrantRetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)).\n",
+    "RetrieveChat is a conversational system for retrieve augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)).\n",
     "\n",
     "We'll demonstrate usage of RetrieveChat with Qdrant for code generation and question answering w/ human feedback.\n",
     "\n",
@@ -74,7 +74,7 @@
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -125,7 +125,7 @@
    "source": [
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `QdrantRetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `QdrantRetrieveUserProxyAgent.generate_init_prompt` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant.\n",
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.generate_init_prompt` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant.\n",
     "\n",
     "### You can find the list of all the embedding models supported by Qdrant [here](https://qdrant.github.io/fastembed/examples/Supported_Models/)."
    ]
@@ -151,8 +151,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -167,18 +167,9 @@
     "client = QdrantClient(\":memory:\")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "#\n",
-    "# Here we generated the documentations from FLAML's docstrings. Not needed if you just want to try this notebook but not to reproduce the\n",
-    "# outputs. Clone the FLAML (https://github.com/microsoft/FLAML) repo and navigate to its website folder. Pip install and run `pydoc-markdown`\n",
-    "# and it will generate folder `reference` under `website/docs`.\n",
-    "#\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# We use an in-memory QdrantClient instance here. Not recommended for production.\n",
-    "# Get the installation instructions here: https://qdrant.tech/documentation/guides/installation/\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/qdrant\n",
+    "# for more information on the RetrieveUserProxyAgent and QdrantVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
diff --git a/notebook/agentchat_capability_long_context_handling.ipynb b/notebook/agentchat_capability_long_context_handling.ipynb
deleted file mode 100644
index 0a9d715e3e5..00000000000
--- a/notebook/agentchat_capability_long_context_handling.ipynb
+++ /dev/null
@@ -1,687 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Handling A Long Context via `TransformChatHistory`\n",
-    "\n",
-    "<div class=\"alert alert-danger\" role=\"alert\">\n",
-    "    <strong>Deprecation Notice:</strong> <code>TransformChatHistory</code> is no longer supported and will be removed in version <code>0.2.30</code>. Please transition to using <code>TransformMessages</code> as the new standard method. For a detailed introduction to this method, including how to limit the number of tokens in message context history to replace <code>TransformChatHistory</code>, visit our guide <a href=\"https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages\" target=\"_blank\">Introduction to Transform Messages</a>.\n",
-    "</div>\n",
-    "\n",
-    "This notebook illustrates how you can use the `TransformChatHistory` capability to give any `Conversable` agent an ability to handle a long context. \n",
-    "\n",
-    "````{=mdx}\n",
-    ":::info Requirements\n",
-    "Install `pyautogen`:\n",
-    "```bash\n",
-    "pip install pyautogen\n",
-    "```\n",
-    "\n",
-    "For more information, please refer to the [installation guide](/docs/installation/).\n",
-    ":::\n",
-    "````"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import autogen\n",
-    "from autogen.agentchat.contrib.capabilities import context_handling"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm_config = {\n",
-    "    \"config_list\": [{\"model\": \"gpt-3.5-turbo\", \"api_key\": os.environ.get(\"OPENAI_API_KEY\")}],\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "````{=mdx}\n",
-    ":::tip\n",
-    "Learn more about configuring LLMs for agents [here](/docs/topics/llm_configuration).\n",
-    ":::\n",
-    "````\n",
-    "\n",
-    "To add this ability to any agent, define the capability and then use `add_to_agent`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "plot and save a graph of x^2 from -10 to 10\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "To plot and save a graph of the function x^2 from -10 to 10, you can use the matplotlib library in Python. Here is the code:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values from -10 to 10\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Calculate corresponding y values (x^2)\n",
-      "y = x**2\n",
-      "\n",
-      "# Create the plot\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Add labels and title\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('y')\n",
-      "plt.title('Plot of x^2')\n",
-      "\n",
-      "# Save the plot as a file\n",
-      "plt.savefig('x_squared_plot.png')\n",
-      "\n",
-      "# Show the plot\n",
-      "plt.show()\n",
-      "```\n",
-      "\n",
-      "This code will create a plot of the function x^2 and save it as \"x_squared_plot.png\" in the current directory. Make sure you have the matplotlib library installed before running this code.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Figure(640x480)\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 0 messages. Reduced from 3 to 3.\u001b[0m\n",
-      "\u001b[33mTruncated 139 tokens. Tokens reduced from 223 to 84\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480. \n",
-      "\n",
-      "To save the graph as an image file, you can modify the code as follows:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values from -10 to 10\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Generate y values by squaring x\n",
-      "y = x ** 2\n",
-      "\n",
-      "# Plot the graph\n",
-      "plt.plot(x, y)\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('x^2')\n",
-      "plt.title('Graph of x^2')\n",
-      "plt.grid(True)\n",
-      "\n",
-      "# Save the graph as an image file, for example as 'graph.png'\n",
-      "plt.savefig('graph.png')\n",
-      "```\n",
-      "\n",
-      "By executing this updated code, the graph will be saved as an image file named 'graph.png' in the same directory as your Python script.\n",
-      "\n",
-      "Please let me know if you need any further assistance.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 0 messages. Reduced from 5 to 5.\u001b[0m\n",
-      "\u001b[33mTruncated 159 tokens. Tokens reduced from 306 to 147\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\n",
-      "\n",
-      "To save the graph as an image file, you can modify the code as follows:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values from -10 to 10\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Calculate y values (x^2)\n",
-      "y = x**2\n",
-      "\n",
-      "# Plot the graph\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Add labels and title\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('y')\n",
-      "plt.title('Graph of x^2')\n",
-      "\n",
-      "# Save the graph as an image file\n",
-      "plt.savefig('graph.png')\n",
-      "\n",
-      "# Close the plot\n",
-      "plt.close()\n",
-      "```\n",
-      "\n",
-      "This code will save the plot as an image file named \"graph.png\" in the current directory. You can change the filename and path if needed.\n",
-      "\n",
-      "Please let me know if you need any further assistance.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 0 messages. Reduced from 7 to 7.\u001b[0m\n",
-      "\u001b[33mTruncated 159 tokens. Tokens reduced from 369 to 210\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\n",
-      "\n",
-      "To save the graph as an image file, you can modify the code as follows:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Generate y values\n",
-      "y = x**2\n",
-      "\n",
-      "# Plot the graph\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Set labels and title\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('y')\n",
-      "plt.title('Graph of x^2')\n",
-      "\n",
-      "# Save the graph as an image file\n",
-      "plt.savefig('graph.png')\n",
-      "```\n",
-      "\n",
-      "This code will save the graph as a PNG image file named 'graph.png' in the same directory where you are running the script. You can change the filename and format (e.g., 'graph.jpg') if desired.\n",
-      "\n",
-      "Please note that you will need to have the matplotlib library installed to run this code. If you don't have it installed, you can install it by running `pip install matplotlib` in your terminal or command prompt.\n",
-      "\n",
-      "Let me know if you need any further assistance!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 0 messages. Reduced from 9 to 9.\u001b[0m\n",
-      "\u001b[33mTruncated 198 tokens. Tokens reduced from 471 to 273\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\n",
-      "\n",
-      "To save the graph as an image file, you can modify the code as follows:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values from -10 to 10\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Generate y values by squaring x\n",
-      "y = x**2\n",
-      "\n",
-      "# Plot the graph\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Add labels and title\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('x^2')\n",
-      "plt.title('Graph of x^2')\n",
-      "\n",
-      "# Save the graph as an image file\n",
-      "plt.savefig('x_squared_plot.png')\n",
-      "\n",
-      "# Display the graph\n",
-      "plt.show()\n",
-      "```\n",
-      "\n",
-      "This code will save the graph as a PNG image file named \"x_squared_plot.png\" in the current working directory. You can customize the filename and file format according to your needs.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Figure(640x480)\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 1 messages. Reduced from 11 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 174 tokens. Tokens reduced from 501 to 327\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\n",
-      "\n",
-      "To save the graph as an image file, you can modify the code as follows:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Generate y values\n",
-      "y = x ** 2\n",
-      "\n",
-      "# Create the plot\n",
-      "plt.plot(x, y)\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('y')\n",
-      "plt.title('Graph of x^2')\n",
-      "\n",
-      "# Save the plot as an image file\n",
-      "plt.savefig('x_squared.png')\n",
-      "\n",
-      "plt.show()\n",
-      "```\n",
-      "\n",
-      "The code will save the plot as a PNG image file named \"x_squared.png\" in the current directory. You can change the filename or file extension as needed.\n",
-      "\n",
-      "Please let me know if you need any further assistance!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Figure(640x480)\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 3 messages. Reduced from 13 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 227 tokens. Tokens reduced from 554 to 327\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "To save the graph as an image file, you can modify the code as follows:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "\n",
-      "# Create the plot\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Set the plot title and axes labels\n",
-      "plt.title(\"Plot of x^2\")\n",
-      "plt.xlabel(\"x\")\n",
-      "plt.ylabel(\"y\")\n",
-      "\n",
-      "# Save the plot as an image file\n",
-      "plt.savefig(\"plot.png\")\n",
-      "```\n",
-      "\n",
-      "This code will save the plot as a PNG file with the name \"plot.png\" in the same directory as your Python script. You can change the file name and format (e.g., \"plot.jpg\") if desired.\n",
-      "\n",
-      "Once you run this modified code, the plot will be saved as an image file. You can then use this file for further analysis or to share the plot with others.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 1 (execution failed)\n",
-      "Code output: \n",
-      "Traceback (most recent call last):\n",
-      "  File \"\", line 4, in <module>\n",
-      "    plt.plot(x, y)\n",
-      "NameError: name 'x' is not defined\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 5 messages. Reduced from 15 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 258 tokens. Tokens reduced from 622 to 364\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "It seems that the code encountered an error because the variables `x` and `y` were not defined.\n",
-      "\n",
-      "Let's modify the code to define the variables `x` and `y` and then save the graph as an image file:\n",
-      "\n",
-      "```python\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Define the x values as an array from -10 to 10\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Define the y values as the square of x\n",
-      "y = x ** 2\n",
-      "\n",
-      "# Create the plot\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Set the plot title and axes labels\n",
-      "plt.title(\"Plot of x^2\")\n",
-      "plt.xlabel(\"x\")\n",
-      "plt.ylabel(\"y\")\n",
-      "\n",
-      "# Save the plot as an image file\n",
-      "plt.savefig('plot.png')\n",
-      "```\n",
-      "This code uses the `numpy` library to generate an array of x values from -10 to 10 and calculates the corresponding y values as the square of x. The plot is then saved as an image file named `plot.png`.\n",
-      "\n",
-      "Please make sure you have the `numpy` and `matplotlib` libraries installed before running this code. Let me know if you need help with that.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 7 messages. Reduced from 17 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 401 tokens. Tokens reduced from 765 to 364\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The code executed successfully. The graph was saved as an image file with the name \"plot.png\" in the current working directory.\n",
-      "\n",
-      "Now you can share the \"plot.png\" file or view it using an image viewer.\n",
-      "\n",
-      "Let me know if there's anything else I can help with!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 9 messages. Reduced from 19 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 282 tokens. Tokens reduced from 633 to 351\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Is there anything else I can help you with?\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 11 messages. Reduced from 21 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 342 tokens. Tokens reduced from 634 to 292\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "If you need any further assistance, feel free to ask. I'm here to help!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_history=[{'content': 'plot and save a graph of x^2 from -10 to 10', 'role': 'assistant'}, {'content': 'To plot and save a graph of the function x^2 from -10 to 10, you can use the matplotlib library in Python. Here is the code:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Generate x values from -10 to 10\\nx = np.linspace(-10, 10, 100)\\n\\n# Calculate corresponding y values (x^2)\\ny = x**2\\n\\n# Create the plot\\nplt.plot(x, y)\\n\\n# Add labels and title\\nplt.xlabel(\\'x\\')\\nplt.ylabel(\\'y\\')\\nplt.title(\\'Plot of x^2\\')\\n\\n# Save the plot as a file\\nplt.savefig(\\'x_squared_plot.png\\')\\n\\n# Show the plot\\nplt.show()\\n```\\n\\nThis code will create a plot of the function x^2 and save it as \"x_squared_plot.png\" in the current directory. Make sure you have the matplotlib library installed before running this code.', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\nFigure(640x480)\\n', 'role': 'assistant'}, {'content': \"Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480. \\n\\nTo save the graph as an image file, you can modify the code as follows:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Generate x values from -10 to 10\\nx = np.linspace(-10, 10, 100)\\n\\n# Generate y values by squaring x\\ny = x ** 2\\n\\n# Plot the graph\\nplt.plot(x, y)\\nplt.xlabel('x')\\nplt.ylabel('x^2')\\nplt.title('Graph of x^2')\\nplt.grid(True)\\n\\n# Save the graph as an image file, for example as 'graph.png'\\nplt.savefig('graph.png')\\n```\\n\\nBy executing this updated code, the graph will be saved as an image file named 'graph.png' in the same directory as your Python script.\\n\\nPlease let me know if you need any further assistance.\", 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\n', 'role': 'assistant'}, {'content': 'Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\\n\\nTo save the graph as an image file, you can modify the code as follows:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Generate x values from -10 to 10\\nx = np.linspace(-10, 10, 100)\\n\\n# Calculate y values (x^2)\\ny = x**2\\n\\n# Plot the graph\\nplt.plot(x, y)\\n\\n# Add labels and title\\nplt.xlabel(\\'x\\')\\nplt.ylabel(\\'y\\')\\nplt.title(\\'Graph of x^2\\')\\n\\n# Save the graph as an image file\\nplt.savefig(\\'graph.png\\')\\n\\n# Close the plot\\nplt.close()\\n```\\n\\nThis code will save the plot as an image file named \"graph.png\" in the current directory. You can change the filename and path if needed.\\n\\nPlease let me know if you need any further assistance.', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\n', 'role': 'assistant'}, {'content': \"Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\\n\\nTo save the graph as an image file, you can modify the code as follows:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Generate x values\\nx = np.linspace(-10, 10, 100)\\n\\n# Generate y values\\ny = x**2\\n\\n# Plot the graph\\nplt.plot(x, y)\\n\\n# Set labels and title\\nplt.xlabel('x')\\nplt.ylabel('y')\\nplt.title('Graph of x^2')\\n\\n# Save the graph as an image file\\nplt.savefig('graph.png')\\n```\\n\\nThis code will save the graph as a PNG image file named 'graph.png' in the same directory where you are running the script. You can change the filename and format (e.g., 'graph.jpg') if desired.\\n\\nPlease note that you will need to have the matplotlib library installed to run this code. If you don't have it installed, you can install it by running `pip install matplotlib` in your terminal or command prompt.\\n\\nLet me know if you need any further assistance!\", 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\n', 'role': 'assistant'}, {'content': 'Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\\n\\nTo save the graph as an image file, you can modify the code as follows:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Generate x values from -10 to 10\\nx = np.linspace(-10, 10, 100)\\n\\n# Generate y values by squaring x\\ny = x**2\\n\\n# Plot the graph\\nplt.plot(x, y)\\n\\n# Add labels and title\\nplt.xlabel(\\'x\\')\\nplt.ylabel(\\'x^2\\')\\nplt.title(\\'Graph of x^2\\')\\n\\n# Save the graph as an image file\\nplt.savefig(\\'x_squared_plot.png\\')\\n\\n# Display the graph\\nplt.show()\\n```\\n\\nThis code will save the graph as a PNG image file named \"x_squared_plot.png\" in the current working directory. You can customize the filename and file format according to your needs.', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\nFigure(640x480)\\n', 'role': 'assistant'}, {'content': 'Great! The code executed successfully and generated a plot of the function x^2 from -10 to 10. The plot was displayed in a figure with size 640x480.\\n\\nTo save the graph as an image file, you can modify the code as follows:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Generate x values\\nx = np.linspace(-10, 10, 100)\\n\\n# Generate y values\\ny = x ** 2\\n\\n# Create the plot\\nplt.plot(x, y)\\nplt.xlabel(\\'x\\')\\nplt.ylabel(\\'y\\')\\nplt.title(\\'Graph of x^2\\')\\n\\n# Save the plot as an image file\\nplt.savefig(\\'x_squared.png\\')\\n\\nplt.show()\\n```\\n\\nThe code will save the plot as a PNG image file named \"x_squared.png\" in the current directory. You can change the filename or file extension as needed.\\n\\nPlease let me know if you need any further assistance!', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\nFigure(640x480)\\n', 'role': 'assistant'}, {'content': 'To save the graph as an image file, you can modify the code as follows:\\n\\n```python\\nimport matplotlib.pyplot as plt\\n\\n# Create the plot\\nplt.plot(x, y)\\n\\n# Set the plot title and axes labels\\nplt.title(\"Plot of x^2\")\\nplt.xlabel(\"x\")\\nplt.ylabel(\"y\")\\n\\n# Save the plot as an image file\\nplt.savefig(\"plot.png\")\\n```\\n\\nThis code will save the plot as a PNG file with the name \"plot.png\" in the same directory as your Python script. You can change the file name and format (e.g., \"plot.jpg\") if desired.\\n\\nOnce you run this modified code, the plot will be saved as an image file. You can then use this file for further analysis or to share the plot with others.', 'role': 'user'}, {'content': 'exitcode: 1 (execution failed)\\nCode output: \\nTraceback (most recent call last):\\n  File \"\", line 4, in <module>\\n    plt.plot(x, y)\\nNameError: name \\'x\\' is not defined\\n', 'role': 'assistant'}, {'content': 'It seems that the code encountered an error because the variables `x` and `y` were not defined.\\n\\nLet\\'s modify the code to define the variables `x` and `y` and then save the graph as an image file:\\n\\n```python\\nimport matplotlib.pyplot as plt\\nimport numpy as np\\n\\n# Define the x values as an array from -10 to 10\\nx = np.linspace(-10, 10, 100)\\n\\n# Define the y values as the square of x\\ny = x ** 2\\n\\n# Create the plot\\nplt.plot(x, y)\\n\\n# Set the plot title and axes labels\\nplt.title(\"Plot of x^2\")\\nplt.xlabel(\"x\")\\nplt.ylabel(\"y\")\\n\\n# Save the plot as an image file\\nplt.savefig(\\'plot.png\\')\\n```\\nThis code uses the `numpy` library to generate an array of x values from -10 to 10 and calculates the corresponding y values as the square of x. The plot is then saved as an image file named `plot.png`.\\n\\nPlease make sure you have the `numpy` and `matplotlib` libraries installed before running this code. Let me know if you need help with that.', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\n', 'role': 'assistant'}, {'content': 'Great! The code executed successfully. The graph was saved as an image file with the name \"plot.png\" in the current working directory.\\n\\nNow you can share the \"plot.png\" file or view it using an image viewer.\\n\\nLet me know if there\\'s anything else I can help with!', 'role': 'user'}, {'content': '', 'role': 'assistant'}, {'content': 'Is there anything else I can help you with?', 'role': 'user'}, {'content': '', 'role': 'assistant'}, {'content': \"If you need any further assistance, feel free to ask. I'm here to help!\", 'role': 'user'}], summary=\"If you need any further assistance, feel free to ask. I'm here to help!\", cost=({'total_cost': 0.015855, 'gpt-3.5-turbo-0613': {'cost': 0.015855, 'prompt_tokens': 8242, 'completion_tokens': 1746, 'total_tokens': 9988}}, {'total_cost': 0.0147465, 'gpt-3.5-turbo-0613': {'cost': 0.0147465, 'prompt_tokens': 7755, 'completion_tokens': 1557, 'total_tokens': 9312}}), human_input=[])"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "assistant = autogen.AssistantAgent(\n",
-    "    \"assistant\",\n",
-    "    llm_config=llm_config,\n",
-    ")\n",
-    "\n",
-    "\n",
-    "# Instantiate the capability to manage chat history\n",
-    "manage_chat_history = context_handling.TransformChatHistory(max_tokens_per_message=50, max_messages=10, max_tokens=1000)\n",
-    "# Add the capability to the assistant\n",
-    "manage_chat_history.add_to_agent(assistant)\n",
-    "\n",
-    "user_proxy = autogen.UserProxyAgent(\n",
-    "    \"user_proxy\",\n",
-    "    human_input_mode=\"NEVER\",\n",
-    "    is_termination_msg=lambda x: \"TERMINATE\" in x.get(\"content\", \"\"),\n",
-    "    code_execution_config={\n",
-    "        \"work_dir\": \"coding\",\n",
-    "        \"use_docker\": False,\n",
-    "    },\n",
-    "    max_consecutive_auto_reply=10,\n",
-    ")\n",
-    "\n",
-    "user_proxy.initiate_chat(assistant, message=\"plot and save a graph of x^2 from -10 to 10\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Why is this important?\n",
-    "This capability is especially useful if you expect the agent histories to become exceptionally large and exceed the context length offered by your LLM.\n",
-    "For example, in the example below, we will define two agents -- one without this ability and one with this ability.\n",
-    "\n",
-    "The agent with this ability will be able to handle longer chat history without crashing."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "plot and save a graph of x^2 from -10 to 10\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "Encountered an error with the base assistant\n",
-      "Error code: 400 - {'error': {'message': \"This model's maximum context length is 4097 tokens. However, your messages resulted in 1009487 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}\n",
-      "\n",
-      "\n",
-      "\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "plot and save a graph of x^2 from -10 to 10\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 1991 messages. Reduced from 2001 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 1000800 tokens. Tokens reduced from 1001015 to 215\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Here's the Python code to plot and save a graph of x^2 from -10 to 10:\n",
-      "\n",
-      "```python\n",
-      "# filename: plot_graph.py\n",
-      "\n",
-      "import matplotlib.pyplot as plt\n",
-      "import numpy as np\n",
-      "\n",
-      "# Generate x values from -10 to 10\n",
-      "x = np.linspace(-10, 10, 100)\n",
-      "\n",
-      "# Calculate y values as x^2\n",
-      "y = x**2\n",
-      "\n",
-      "# Create plot\n",
-      "plt.plot(x, y)\n",
-      "\n",
-      "# Add labels and title\n",
-      "plt.xlabel('x')\n",
-      "plt.ylabel('y')\n",
-      "plt.title('Graph of y = x^2')\n",
-      "\n",
-      "# Save the plot as a PNG image\n",
-      "plt.savefig('graph.png')\n",
-      "\n",
-      "# Show the plot\n",
-      "plt.show()\n",
-      "```\n",
-      "\n",
-      "To execute this code, save it to a file called `plot_graph.py` and run it using Python. This will generate a file called `graph.png` in the same directory, which will contain the graph of x^2 from -10 to 10.\n",
-      "\n",
-      "Note: Make sure you have the matplotlib library installed. You can install it by running `pip install matplotlib` in your terminal or command prompt.\n",
-      "\n",
-      "Let me know if you need any further assistance!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Figure(640x480)\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 1993 messages. Reduced from 2003 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 997232 tokens. Tokens reduced from 997466 to 234\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "Great! The graph of x^2 from -10 to 10 has been plotted and saved successfully. You can find the saved graph as an image file on your computer. \n",
-      "\n",
-      "Is there anything else I can help you with?\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mTruncated 1995 messages. Reduced from 2005 to 10.\u001b[0m\n",
-      "\u001b[33mTruncated 997096 tokens. Tokens reduced from 997326 to 230\u001b[0m\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "TERMINATE\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    }
-   ],
-   "source": [
-    "assistant_base = autogen.AssistantAgent(\n",
-    "    \"assistant\",\n",
-    "    llm_config=llm_config,\n",
-    ")\n",
-    "\n",
-    "assistant_with_context_handling = autogen.AssistantAgent(\n",
-    "    \"assistant\",\n",
-    "    llm_config=llm_config,\n",
-    ")\n",
-    "# suppose this capability is not available\n",
-    "manage_chat_history = context_handling.TransformChatHistory(max_tokens_per_message=50, max_messages=10, max_tokens=1000)\n",
-    "manage_chat_history.add_to_agent(assistant_with_context_handling)\n",
-    "\n",
-    "user_proxy = autogen.UserProxyAgent(\n",
-    "    \"user_proxy\",\n",
-    "    human_input_mode=\"NEVER\",\n",
-    "    is_termination_msg=lambda x: \"TERMINATE\" in x.get(\"content\", \"\"),\n",
-    "    code_execution_config={\n",
-    "        \"work_dir\": \"coding\",\n",
-    "        \"use_docker\": False,\n",
-    "    },\n",
-    "    max_consecutive_auto_reply=2,\n",
-    ")\n",
-    "\n",
-    "# suppose the chat history is large\n",
-    "# Create a very long chat history that is bound to cause a crash\n",
-    "# for gpt 3.5\n",
-    "long_history = []\n",
-    "for i in range(1000):\n",
-    "    # define a fake, very long message\n",
-    "    assitant_msg = {\"role\": \"assistant\", \"content\": \"test \" * 1000}\n",
-    "    user_msg = {\"role\": \"user\", \"content\": \"\"}\n",
-    "\n",
-    "    assistant_base.send(assitant_msg, user_proxy, request_reply=False, silent=True)\n",
-    "    assistant_with_context_handling.send(assitant_msg, user_proxy, request_reply=False, silent=True)\n",
-    "    user_proxy.send(user_msg, assistant_base, request_reply=False, silent=True)\n",
-    "    user_proxy.send(user_msg, assistant_with_context_handling, request_reply=False, silent=True)\n",
-    "\n",
-    "try:\n",
-    "    user_proxy.initiate_chat(assistant_base, message=\"plot and save a graph of x^2 from -10 to 10\", clear_history=False)\n",
-    "except Exception as e:\n",
-    "    print(\"Encountered an error with the base assistant\")\n",
-    "    print(e)\n",
-    "    print(\"\\n\\n\")\n",
-    "\n",
-    "try:\n",
-    "    user_proxy.initiate_chat(\n",
-    "        assistant_with_context_handling, message=\"plot and save a graph of x^2 from -10 to 10\", clear_history=False\n",
-    "    )\n",
-    "except Exception as e:\n",
-    "    print(e)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebook/agentchat_compression.ipynb b/notebook/agentchat_compression.ipynb
deleted file mode 100644
index d7f3a7204db..00000000000
--- a/notebook/agentchat_compression.ipynb
+++ /dev/null
@@ -1,876 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Conversations with Chat History Compression Enabled\n",
-    "\n",
-    "<div class=\"alert alert-danger\" role=\"alert\">\n",
-    "  <strong>Deprecation Notice:</strong> <code>CompressibleAgent</code> has been deprecated and will no longer be available as of version <code>0.2.30</code>. Please transition to using <code>TransformMessages</code>, which is now the recommended approach. For a detailed guide on implementing this new standard, refer to our user guide on <a href=\"https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/compressing_text_w_llmligua\" target=\"_blank\">Compressing Text with LLMLingua</a>. This guide provides examples for effectively utilizing LLMLingua transform as a replacement for <code>CompressibleAgent</code>.\n",
-    "</div>\n",
-    "\n",
-    "AutoGen offers conversable agents powered by LLM, tools, or humans, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participance through multi-agent conversation. Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
-    "\n",
-    "In this notebook, we demonstrate how to enable compression of history messages using the `CompressibleAgent`. While this agent retains all the default functionalities of the `AssistantAgent`, it also provides the added feature of compression when activated through the `compress_config` setting.\n",
-    "\n",
-    "Different compression modes are supported:\n",
-    "\n",
-    "1. `compress_config=False` (Default): `CompressibleAgent` is equivalent to `AssistantAgent`.\n",
-    "2. `compress_config=True` or `compress_config={\"mode\": \"TERMINATE\"}`: no compression will be performed. However, we will count token usage before sending requests to the OpenAI model. The conversation will be terminated directly if the total token usage exceeds the maximum token usage allowed by the model (to avoid the token limit error from OpenAI API).\n",
-    "3. `compress_config={\"mode\": \"COMPRESS\", \"trigger_count\": <your pre-set number>, \"leave_last_n\": <your pre-set number>}`: compression is enabled.\n",
-    "\n",
-    "    ```python\n",
-    "    # default compress_config\n",
-    "    compress_config = {\n",
-    "        \"mode\": \"COMPRESS\",\n",
-    "        \"compress_function\": None,\n",
-    "        \"trigger_count\": 0.7, # default to 0.7, or your pre-set number\n",
-    "        \"broadcast\": True, # the compressed with be broadcast to sender. This will not be used in groupchat.\n",
-    "\n",
-    "        # the following settings are for this mode only\n",
-    "        \"leave_last_n\": 2, # leave the last n messages in the history to avoid compression\n",
-    "        \"verbose\": False, # if True, print out the content to be compressed and the compressed content\n",
-    "    }\n",
-    "    ```\n",
-    "\n",
-    "    Currently, our compression logic is as follows:\n",
-    "        1. We will always leave the first user message (as well as system prompts) and compress the rest of the history messages.\n",
-    "        2. You can choose to not compress the last n messages in the history with \"leave_last_n\".\n",
-    "        2. The summary is performed on a per-message basis, with the role of the messages (See compressed content in the example below).\n",
-    "\n",
-    "4. `compress_config={\"mode\": \"CUSTOMIZED\", \"compress_function\": <A customized function for compression>}t`: the `compress_function` function will be called on trigger count. The function should accept a list of messages as input and return a tuple of (is_success: bool, compressed_messages: List[Dict]). The whole message history (except system prompt) will be passed.\n",
-    "\n",
-    "\n",
-    "By adjusting `trigger_count`, you can decide when to compress the history messages based on existing tokens. If this is a float number between 0 and 1, it is interpreted as a ratio of max tokens allowed by the model. For example, the AssistantAgent uses gpt-4 with max tokens 8192, the trigger_count = 0.7 * 8192 = 5734.4 -> 5734. Do not set `trigger_count` to the max tokens allowed by the model, since the same LLM is employed for compression and it needs tokens to generate the compressed content. \n",
-    "\n",
-    "\n",
-    "\n",
-    "## Limitations\n",
-    "- For now, the compression feature **is not well-supported for groupchat**. If you initialize a `CompressibleAgent` in a groupchat with compression, the compressed cannot be broadcast to all other agents in the groupchat. If you use this feature in groupchat, extra cost will be incurred since compression will be performed on at per-agent basis.\n",
-    "- We do not support async compression for now.\n",
-    "\n",
-    "## Requirements\n",
-    "\n",
-    "````{=mdx}\n",
-    ":::info Requirements\n",
-    "Install `pyautogen`:\n",
-    "```bash\n",
-    "pip install pyautogen\n",
-    "```\n",
-    "\n",
-    "For more information, please refer to the [installation guide](/docs/installation/).\n",
-    ":::\n",
-    "````"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Set your API Endpoint\n",
-    "\n",
-    "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define functions according to the function description\n",
-    "from IPython import get_ipython\n",
-    "\n",
-    "import autogen\n",
-    "from autogen.agentchat.contrib.compressible_agent import CompressibleAgent\n",
-    "from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent\n",
-    "\n",
-    "config_list = autogen.config_list_from_json(\n",
-    "    \"OAI_CONFIG_LIST\",\n",
-    "    filter_dict={\n",
-    "        \"model\": [\"gpt-4-1106-preview\"],\n",
-    "    },\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "````{=mdx}\n",
-    ":::tip\n",
-    "Learn more about configuring LLMs for agents [here](/docs/topics/llm_configuration).\n",
-    ":::\n",
-    "````"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example 1\n",
-    "This example is from [agentchat_MathChat.ipynb](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_MathChat.ipynb). Compression with code execution.\n",
-    "\n",
-    "You must set the `model` field in `llm_config`, as it will be used to calculate the token usage.\n",
-    "\n",
-    "Note: we set `trigger_count=600`, and `leave_last_n=2`. In this example, we set a low trigger_count to demonstrate the compression feature. \n",
-    "The token count after compression is still bigger than trigger count, mainly because the trigger count is low an the first and last 2 messages are not compressed. Thus, the compression is performed at each turn. In practice, you want to adjust the trigger_count to a bigger number and properly set the `leave_last_n` to avoid compression at each turn. \n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mmathproxyagent\u001b[0m (to assistant):\n",
-      "\n",
-      "Let's use Python to solve a math problem.\n",
-      "\n",
-      "Query requirements:\n",
-      "You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.\n",
-      "You can use packages like sympy to help you.\n",
-      "You must follow the formats below to write your code:\n",
-      "```python\n",
-      "# your code\n",
-      "```\n",
-      "\n",
-      "First state the key idea to solve the problem. You may choose from three ways to solve the problem:\n",
-      "Case 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed.\n",
-      "Case 2: If the problem is mostly reasoning, you can solve it by yourself directly.\n",
-      "Case 3: If the problem cannot be handled in the above two ways, please follow this process:\n",
-      "1. Solve the problem step by step (do not over-divide the steps).\n",
-      "2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).\n",
-      "3. Wait for me to give the results.\n",
-      "4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\n",
-      "\n",
-      "After all the queries are run and you get the answer, put the answer in \\boxed{}.\n",
-      "\n",
-      "Problem:\n",
-      "Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to mathproxyagent):\n",
-      "\n",
-      "Key Idea:\n",
-      "To solve this inequality $(2x + 10)(x + 3) < (3x + 9)(x + 8)$, we'll first expand both sides of the inequality, then collect all terms on one side to form a quadratic inequality. After simplifying, we will factor the quadratic expression if possible. Then, we'll determine the critical points of the inequality by setting the factors to zero. Finally, we'll use a sign chart or test values within intervals determined by the critical points to find the intervals where the original inequality is satisfied.\n",
-      "\n",
-      "Case 1 applies here, so I will write a Python program using sympy that simplifies the inequality, factors the resulting expression, solves for the critical points, and then determines the intervals where the inequality holds true.\n",
-      "\n",
-      "```python\n",
-      "from sympy import symbols, solve, simplify\n",
-      "\n",
-      "# Define the variable\n",
-      "x = symbols('x')\n",
-      "\n",
-      "# Given inequality\n",
-      "lhs = (2*x + 10)*(x + 3)\n",
-      "rhs = (3*x + 9)*(x + 8)\n",
-      "\n",
-      "# Move all terms to one side of the inequality\n",
-      "inequality = simplify(lhs - rhs) < 0\n",
-      "\n",
-      "# Solve the inequality\n",
-      "solutions = solve(inequality, x)\n",
-      "\n",
-      "# Print the solution using interval notation\n",
-      "print(solutions)\n",
-      "```\n",
-      "\n",
-      "Running this code will provide us with the solution in interval notation. We'll express the final answer in the requested format afterward.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mmathproxyagent\u001b[0m (to assistant):\n",
-      "\n",
-      "((-oo < x) & (x < -14)) | ((-3 < x) & (x < oo))\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Warning: Compression skipped at trigger count threshold. The first msg and last 2 msgs will not be compressed. current msg count: 3. Consider raising trigger_count.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33massistant\u001b[0m (to mathproxyagent):\n",
-      "\n",
-      "The solution obtained from running the Python code suggests that the values of $x$ that satisfy the inequality $(2x + 10)(x + 3) < (3x + 9)(x + 8)$ are in the intervals $(-\\infty, -14)$ and $(-3, \\infty)$.\n",
-      "\n",
-      "Therefore, the answer in interval notation is:\n",
-      "\n",
-      "$$\n",
-      "\\boxed{(-\\infty, -14) \\cup (-3, \\infty)}\n",
-      "$$\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': \"Let's use Python to solve a math problem.\\n\\nQuery requirements:\\nYou should always use the 'print' function for the output and use fractions/radical forms instead of decimals.\\nYou can use packages like sympy to help you.\\nYou must follow the formats below to write your code:\\n```python\\n# your code\\n```\\n\\nFirst state the key idea to solve the problem. You may choose from three ways to solve the problem:\\nCase 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed.\\nCase 2: If the problem is mostly reasoning, you can solve it by yourself directly.\\nCase 3: If the problem cannot be handled in the above two ways, please follow this process:\\n1. Solve the problem step by step (do not over-divide the steps).\\n2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).\\n3. Wait for me to give the results.\\n4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\\n\\nAfter all the queries are run and you get the answer, put the answer in \\\\boxed{}.\\n\\nProblem:\\nFind all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.\", 'role': 'assistant'}, {'content': \"Key Idea:\\nTo solve this inequality $(2x + 10)(x + 3) < (3x + 9)(x + 8)$, we'll first expand both sides of the inequality, then collect all terms on one side to form a quadratic inequality. After simplifying, we will factor the quadratic expression if possible. Then, we'll determine the critical points of the inequality by setting the factors to zero. Finally, we'll use a sign chart or test values within intervals determined by the critical points to find the intervals where the original inequality is satisfied.\\n\\nCase 1 applies here, so I will write a Python program using sympy that simplifies the inequality, factors the resulting expression, solves for the critical points, and then determines the intervals where the inequality holds true.\\n\\n```python\\nfrom sympy import symbols, solve, simplify\\n\\n# Define the variable\\nx = symbols('x')\\n\\n# Given inequality\\nlhs = (2*x + 10)*(x + 3)\\nrhs = (3*x + 9)*(x + 8)\\n\\n# Move all terms to one side of the inequality\\ninequality = simplify(lhs - rhs) < 0\\n\\n# Solve the inequality\\nsolutions = solve(inequality, x)\\n\\n# Print the solution using interval notation\\nprint(solutions)\\n```\\n\\nRunning this code will provide us with the solution in interval notation. We'll express the final answer in the requested format afterward.\", 'role': 'user'}, {'content': '((-oo < x) & (x < -14)) | ((-3 < x) & (x < oo))', 'role': 'assistant'}, {'content': 'The solution obtained from running the Python code suggests that the values of $x$ that satisfy the inequality $(2x + 10)(x + 3) < (3x + 9)(x + 8)$ are in the intervals $(-\\\\infty, -14)$ and $(-3, \\\\infty)$.\\n\\nTherefore, the answer in interval notation is:\\n\\n$$\\n\\\\boxed{(-\\\\infty, -14) \\\\cup (-3, \\\\infty)}\\n$$', 'role': 'user'}], summary='The solution obtained from running the Python code suggests that the values of $x$ that satisfy the inequality $(2x + 10)(x + 3) < (3x + 9)(x + 8)$ are in the intervals $(-\\\\infty, -14)$ and $(-3, \\\\infty)$.\\n\\nTherefore, the answer in interval notation is:\\n\\n$$\\n\\\\boxed{(-\\\\infty, -14) \\\\cup (-3, \\\\infty)}\\n$$', cost=({'total_cost': 0.052199999999999996, 'gpt-4': {'cost': 0.052199999999999996, 'prompt_tokens': 954, 'completion_tokens': 393, 'total_tokens': 1347}}, {'total_cost': 0.052199999999999996, 'gpt-4': {'cost': 0.052199999999999996, 'prompt_tokens': 954, 'completion_tokens': 393, 'total_tokens': 1347}}), human_input=[])"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# 1. replace AssistantAgent with CompressibleAgent\n",
-    "assistant = CompressibleAgent(\n",
-    "    name=\"assistant\",\n",
-    "    system_message=\"You are a helpful assistant.\",\n",
-    "    llm_config={\n",
-    "        \"timeout\": 600,\n",
-    "        \"cache_seed\": 42,\n",
-    "        \"config_list\": config_list,\n",
-    "        \"model\": \"gpt-4-1106-preview\",  # you must set the model field in llm_config, as it will be used to calculate the token usage.\n",
-    "    },\n",
-    "    compress_config={\n",
-    "        \"mode\": \"COMPRESS\",\n",
-    "        \"trigger_count\": 600,  # set this to a large number for less frequent compression\n",
-    "        \"verbose\": True,  # to allow printing of compression information: context before and after compression\n",
-    "        \"leave_last_n\": 2,\n",
-    "    },\n",
-    ")\n",
-    "\n",
-    "# 2. create the MathUserProxyAgent instance named \"mathproxyagent\"\n",
-    "mathproxyagent = MathUserProxyAgent(\n",
-    "    name=\"mathproxyagent\",\n",
-    "    human_input_mode=\"NEVER\",\n",
-    "    code_execution_config={\n",
-    "        \"use_docker\": False\n",
-    "    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n",
-    "    max_consecutive_auto_reply=5,\n",
-    ")\n",
-    "math_problem = (\n",
-    "    \"Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.\"\n",
-    ")\n",
-    "mathproxyagent.initiate_chat(assistant, message=mathproxyagent.message_generator, problem=math_problem)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example 2\n",
-    "This example is from [agentchat_function_call.ipynb](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_function_call.ipynb). Compression with function calls. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
-      "\n",
-      "Draw two agents chatting with each other with an example dialog. Don't add plt.show().\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
-      "\n",
-      "\u001b[32m***** Suggested function Call: python *****\u001b[0m\n",
-      "Arguments: \n",
-      "{\n",
-      "  \"cell\": \"import matplotlib.pyplot as plt\\nimport numpy as np\\n\\nfig, ax = plt.subplots()\\n\\n# Define the agents as circles\\nagent1 = plt.Circle((0.4, 0.5), 0.1, color='blue')\\nagent2 = plt.Circle((0.6, 0.5), 0.1, color='red')\\n\\n# Draw the agents\\nax.add_artist(agent1)\\nax.add_artist(agent2)\\n\\n# Example dialog boxes\\nplt.text(0.28, 0.6, \\\"Hello!\\\", fontsize=12, bbox=dict(facecolor='white', alpha=0.5))\\nplt.text(0.58, 0.6, \\\"Hi there!\\\", fontsize=12, bbox=dict(facecolor='white', alpha=0.5))\\n\\n# Set the limits and remove axes\\nax.set_xlim(0, 1)\\nax.set_ylim(0, 1)\\nax.axis('off')\\n\"\n",
-      "}\n",
-      "\u001b[32m*******************************************\u001b[0m\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[35m\n",
-      ">>>>>>>> EXECUTING FUNCTION python...\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(0.0, 1.0, 0.0, 1.0)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAGFCAYAAABg2vAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdR0lEQVR4nO3de3SU9Z3H8c8zMyEkIjcN18YGAkGE2iO4si1QoSpC16VILSq6HjheIko99ZYK1kUUQZG2dk2VrOi6eNl1o1h6WkWqBRQQXStdFQ0GMRQpYkAS0IEwycz+8esQQhBzmzxP8n2/zomcTJLJb5xn8rznufweL5FIJAQAAMwK+T0AAADgL2IAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMI4YAADAOGIAAADjiAEAAIwjBgAAMC7i9wDQdlVWVioajfo9jFaTmZmpLl26+D2Mds3aMpVqLLNoKGIATVJZWanCwkLFYjG/h9Jq0tLSNHPmTP64pojFZSrVWGbRUMQAmiQajSoWi2ny5MnKysryezgpV15ermXLlikajfKHNUWsLVOpxjKLxiAG0CxZWVnq3bu338NAO8IyBbQ+DiAEAMA4YgAAAOOIAQAAjCMG0OZMmzZNOTk5dW7zPE933nmnL+NBcOXk5GjatGlN/vlp06apU6dOLTcgIKCIAaTM448/Ls/z9NZbbx3z62PGjNHQoUNbeVT1rV69Wp7nafXq1X4PBceRquUpGo3qzjvvbHPPPwGMlsTZBADarc2bNysUOv57nmg0qrlz50pyQQFYxJYBAO1Wenq60tLS/B7GcSUSCR04cMDvYcA4YgCB8uSTT2r48OHKyMhQ9+7ddckll2j79u1Nuq+NGzdqwoQJ6ty5szp16qRzzjlHGzZsaOERI8i+7piBsrKywxMczZ07V57nHXPz+44dOzRp0iR16tRJWVlZuuWWW1RTU1Pne+LxuB544AENGTJEHTt2VM+ePZWfn6+9e/fWG9MFF1ygl156SWeeeaYyMjJUVFQkSaqoqNBPf/pTZWdnKz09XQMGDNB9992neDze/P8ZwHGwmwApV1lZqd27d9e7/ehpZ++55x7dcccdmjJliq666iqVl5frwQcf1Pe+9z1t3LhRXbt2bfDv3LRpk0aPHq3OnTuroKBAaWlpKioq0pgxY7RmzRqNGDGiuQ8LPmno8tQQWVlZevjhhzVjxgxdeOGFmjx5siTp9NNPP/w9NTU1Ov/88zVixAgtWrRIL7/8sn7xi18oNzdXM2bMOPx9+fn5evzxxzV9+nTdcMMN+vjjj1VYWKiNGzdq3bp1dbZQbN68WZdeeqny8/N19dVXa9CgQYpGozr77LO1Y8cO5efn65RTTtH69es1a9Ys7dy5Uw888ECjHx/QUMQAUu7cc8/9yq8NGTJEkrRt2zbNmTNH8+bN0+zZsw9/ffLkyTrjjDP00EMP1bn96/z85z9XLBbT2rVr1b9/f0nSFVdcoUGDBqmgoEBr1qxp4qOB3xqyPDXUCSecoIsuukgzZszQ6aefrssvv7ze9xw8eFAXX3yx7rjjDknStddeq2HDhunRRx89HANr167VkiVL9NRTT2nq1KmHf3bs2LEaP368iouL69y+ZcsWrVixQueff/7h2+bNm6ePPvpIGzdu1MCBAyW5wOjTp4/uv/9+3XzzzcrOzm7U4wMaihhAyv3mN79RXl5evdtvvvnmw5taly1bpng8rilTptR519erVy8NHDhQq1atanAM1NTUaOXKlZo0adLhEJCk3r17a+rUqXrkkUe0b98+de7cuZmPDH5oyPLU0q699to6n48ePVpPPPHE4c+Li4vVpUsXnXfeeXWW3+HDh6tTp05atWpVnRjo169fnRBI3sfo0aPVrVu3Ovdx7rnn6t5779Wrr76qyy67rKUfGiCJGEArOOuss3TmmWfWu/3IP3qlpaVKJBKH3xEdrTEHgZWXlysajWrQoEH1vjZ48GDF43Ft37690e8iEQwNWZ5aUseOHetdOKlbt251jgUoLS1VZWWlevToccz7+Oyzz+p83q9fv3rfU1paqnfeeecrL9J09H0ALYkYQCDE43F5nqcXX3xR4XC43teZ+AV+OdbyeLR4PK4ePXroqaeeOubXj17BZ2RkHPM+zjvvPBUUFBzzPo61NQRoKcQAAiE3N1eJREL9+vVr9h+9rKwsZWZmavPmzfW+VlJSolAoVGff65gxY5RIJJr1O9F2eZ7X7PvIzc3Vyy+/rJEjRx5zRd/Q+/jiiy+Oe0zEkVhm0ZI4tRCBMHnyZIXDYc2dO7feH7lEIqE9e/Y0+L7C4bDGjRun5cuXq6ys7PDtu3bt0tNPP61Ro0bVOV6gsrJSJSUlikajzX4caHsyMzMludP6mmrKlCmqqanR3XffXe9r1dXVDbrvKVOm6PXXX9dLL71U72sVFRWqrq4+/HksFlNJSUlKdovAJrYMIBByc3M1b948zZo1S2VlZZo0aZJOPPFEffzxx3r++ed1zTXX6JZbbmnw/c2bN09//OMfNWrUKF133XWKRCIqKipSVVWVFi5cWOd7n3/+eU2fPl2rVq065gx0VVXSrl1SWZn03HNSRoZ06JD7qK6W0tKkDh2k9HT3b6dO0oAB7iM9vZn/Y1Df559LJSXuCaipqf3Yt086cED63/+VwmEpEpFiMXdbVdVXPhkZGRk67bTT9MwzzygvL0/du3fX0KFDGzW18dlnn638/HwtWLBAf/nLXzRu3DilpaWptLRUxcXF+vWvf62LLrrouPdx66236ne/+50uuOACTZs2TcOHD9eXX36pd999V88++6zKysp08sknS3LzHgwePFhz5sxhSmK0CGIAgXHbbbcpLy9Pv/rVrw5PD5udna1x48Zp4sSJjbqvIUOG6LXXXtOsWbO0YMECxeNxjRgxQk8++eRXzjFQWSlt3izt2eM+ysul3bulaFTav19avVr6z/903+t5UnKW20Si9uNInif17SsNHiydeqqUl1f7kZ3t1lc4hnhcqqiofSL27HFPxIoV7uvPPiv16VP7/cnN/Hv3uifrhRdqn4wvv3RP6oIFUmamdPLJUlaWdNJJtR/dumnJkiX6yU9+ohtvvFGHDh3SnDlzGn2dg8WLF2v48OEqKirS7NmzFYlElJOTo8svv1wjR4782p/PzMzUmjVrNH/+fBUXF2vp0qXq3Lmz8vLyNHfuXHXp0qVR4wEaw0uw4wlNsHPnThUVFSk/P1+9e/f2eziNlkhIn37q1hPvv+9W/MlXgue5jyMnfdu/f6eee65I27blS2r8401Lc1sRkr8jLU0aMUKaPFn64Q+lI86AtGnrVu184gkV/eEPyh8zRr3/vun+cHW15CmDoVDdevM8qUcPV22DBkm9etUGRhvW1l+jaF1sGYAZNTXStm1uC/MHH7h3+6FQ3ZW+dOx3+c119OR4sZi0fr20bp10001uPfSjH7kwGD68XayLji+RkN56S1q+3O17KSlxt+fk1F3xJxItGwLSsZ/wXbtcEa5eLZ14Yu3mnG9+k004MIEYQLtWVSWVlrp1zYcfut3MRwaAn1O+H/m7P/jAbcmeN0/q2bN2i8H3v++2IrQLsZj0pz+5AFi2zK2Aw+H6K3+/JJ+Q/ftdqLz5pjsIJC/PhcHAgRwEgnaLGEC7FI1Kb7whbdjggiAoAXA8yXXirl3SI49IDz8sfeMb0uzZ0vTpUseO/o6vyQ4elB57TJo/X9qxwx3YlzwyPkUzBjZbciE5dMjtR3rvPRcC//iPbv9OcjcG0E5waiHalf37pZdekn75S+nVV10ISMENgK+SXFfu2CFdf7074HDRIumLL/wdV6Ps3+8GnZ0tzZwp/e1v7vYjTpFrE5ILT1WVW6h++Utp5Ur3+IB2gi0DaBcqKqS1a6W333aft7WV/1dJbjXfvVsqKHC7EW68UbrhBqlbN3/H9pX27pX+7d/cSnP/fn83/be0RMLFzIYN7mPYMGnUKKkRV9QEgogtA2jTqqvdMV8PPuhCIB5vPyFwtETCnf54111Sbq47zTFQ69lEwg0qN9cNct++gA2wBSUXtLffdgvf6tVtb4sHcARiAG3WRx9JhYXSmjVu13N7jYCjJU/DnzbNvSndtMnvEcntUx81yg2qosLWk1FT4xbCwkK3UAJtELsJ0Czl5eWt/jurqqRXXnHHdXle67z5jEZb/3EeT/Ixv/mm9O1vS//6r+5Aw0hrv6Krq92BgXfdVXs+ZDOfkPK2Oi30/v3SQw9Jp50mnXOO72ce+PHaRNvFpENoksrKShUWFip29An0KbZ7t7RqlTtboLWX3MrKNFVUzJQUvJngPM8d6P4//+POQGgVn3wiTZni9p23wJNRKamwa1fF2vpMe57nzjYYO9bNeOijtLQ0zZw5k9kL8bWIATRZZWVlq17cZ+lS6fbb3XrHn63QmQpiCCRFIm6+nOJi98Y0pV55Rfrxj9274RbcV14pqY1uF6grFHJRMH++9C//4tswMjMzCQE0CDGAwIvHpZ/9zJ2lhuNLXi/h8cdTuA5autRNfCDZOTagOW69VbrvPgPTSqItIwYQaLGYdOWV0hNP+D2StmfRIunmm1Nwp7fe2sJ3asAVV0hLlrSj6STR3hADCKyDB6ULL3STCLGUNk1BgXTvvS3wpjSRcJtn7r+/RcZljudJ48e7aZjb7FSSaM84tRCBVFMjXXqpm+iNEGi6hQvdbutmu+ceQqA5EglXtVOnBncKZpjGlgEETiLhZq99+GFCoKU89ljtbv4m/fCVV7boeMzyPOm669xERRxDgAAhBhA4994rzZrl9yjal1BI+v3vpQkTGvmDL7wg/fM/c6BgS1uwQLrtNr9HARxGDCBQVq1yp8WxVLYsz3OnHb73nrtuUINs3y4NGeKujsQT0rI8z13OecwYv0cCSOKYAQTI55+74wTYetryEgk3UdNllzVwl3VNjdu/feAAIZAKnucW9s8/93skgCRiAAGRSLjd0rt3s0U6Vaqrpddea+BxgAsXustAcvGd1IjHpfJy6eqriS0EArsJEAi//a07jRCpF4lIJSXu4oLHtGWLNHgwIdBann9emjTJ71HAOLYMwHeHDkk33VQ7ex5Sr6CgqV9EiwqF3MxQrXyND+Bo/PmF74qKpLIydg+0lupqN/fN+vXH+OK6de6dKlsFWkc8Lm3d6l4EgI/YTQBfRaNS375SRYXfI7ElHJZGjHDr/jpGjpTeeIOJcVpbt27uKpCZmX6PBEaxZQC+euYZQsAPNTVuy8A77xxx4//9n7uREGh9e/e6608DPiEG4KvCQo4V8EskIi1efMQNixe7G9H6QiE3KyHgE3YTwDd//rN05pl+j8K2jAxp1y7pRO2XevZ08wrAP3/+szRsmN+jgEG8J4Nviot5I+q3Awfc9XO0YgUh4LdIxL0oAB8QA/DNihUctO63SER6+WW5/1Bm/qqudi8KwAfsJoAv9uyRsrKYfC0IsrOlvyay3dHs8JfnuZkJTzrJ75HAGLYMwBerVxMCQRHaXkYIBEUiIa1Z4/coYBAxAF+UlLBVOihOVYnfQ0BScq5ooJURA/DF1q1+jwBJudqqhLhUZGDw4oAPiAH4orSUgweDIje0VTUhNtMEQnW1e3EArYy/APDFtm1+jwBJfeOfyBOzDgYGLw74gC0D8MWhQ36PAEkdVKWQuEpUYPDigA+IAfiCKxQGByEQMLw44ANiAL5IS/N7BEg6pA4cQBgkvDjgA2IAvuje3e8RIKnS66a4F/Z7GEjixQEfEAPwxaBBXK0wKLaqv0IeM0AFQijkXhxAK+PPMXzRv78U5s1oIGxJ9FcoztkEgRAOuxcH0MqIAfgiN1eKxfweBSTpI+X6PQQkxWLEAHxBDMAX3/mO3yNAUmn6t5To2NHvYSDpu9/1ewQwiBiAL4YOlU4+2e9RIBSSRn2/g7wxYziIIwiysqQhQ/weBQzi1Q9feJ40fjwXK/JbIiGdf77+/h/4KhJxLwqP0zzR+ogB+Oaf/onrE/gtkZAmTJD0gx8w2Y3fqqvd8wD4wEskuKo8/HHwoNSrl1RZ6fdIbAqH3e7pV1/9+w2jR0vr1xMFfunaVfr0Uyk93e+RwCC2DMA3HTtKV1/NKYZ+qamRrr/+iBuuv54Q8Es47F4MhAB8wpYB+GrLFmngQL9HYVP37tLOnVKHDn+/oapK6t1b2rvX13GZVVoqDRjg9yhgFFsG4KsBA6RLLuFAwtbmedLttx8RApJ7V3r77RzA1toiEenSSwkB+IotA/BdWZnbOsDBhK0jFJL69nVvROttla6qciulv/2NXQatJS1N+vBDKSfH75HAMLYMwHc5OdKNN3Kae2uJx6VFi75i93R6uvsiIdA6QiG38BMC8BlbBhAIFRXSqadK5eWsh1IpEnGzP65Zc5y9AYmEdPbZ0uuvs7kmlUIhN8nQ5s1Sly5+jwbG8V4MgdC1q/Rf/+XWQ0gNz5MyM6WnnvqawwI8z31TRgbHD6RSIiH9938TAggEYgCBMXas9LOfsf5JlURCeuwxKTu7Ad+cne2+mTpLDc+TbrtNGjPG75EAkthNgICJxaSRI6WNG9lC3ZI8T7rqKunf/72RP3jNNdKSJURBSwqHpWHDpHXr3MGDQAAQAwic8nLprLOkTz4hCFpCOCydc470+983Yd0Ti7l5o//0JzdLEZonEnFbXd54wx0vAAQEMYBA2rpV+od/cFMVsw5qukhE+ta33JTDnTo18U6++MJNVfzuuzwZzREOu4Nj3nxT6t/f79EAdXDMAAKpf39p5Uo3ZTHTFTdN8k3oihXNCAHJ/fCKFdIpp/BkNFU47BbmlSsJAQQSMYDAGj7c7Vbt1o0ZChsrHJaGDJE2bJB69GiBO+zZ051qOHQoQdBYkYhbiNetc8cKAAFEDCDQvv1tt3v1G99gHdRQoZDbqv/aay0UAkk9e7r9DaNHM0NUQ0UibuF98023MAMBxSsagde/vwuCM85gHdQQF1/stuqfeGIK7rxzZ3fnF1+cgjtvZzzPLbRvvCH16+f3aIDj4k8r2oQePdxW1hkz3OfMRVBXOOzehBYWuvmCUnol3PR090sKC90vZZNNXcmF8/rrpbVrW3jzDJAanE2ANmfZMmn6dCka5dRDyW0tycmRiot92CX99tvSj3/srjbFPNIujk44QfqP/5AuvNDv0QANxpYBtDmTJ0vvvy9NnOg+t7rrIBJxj/2mm6R33vHp2LRhw9wvT15pyupWguRCOHGitGkTIYA2hy0DaNNefFG69lpp+3Y7k+R5nnus3/mOVFTk5hEIhHffdTMWbthQO0gLPM+ddrl4sTR+vN+jAZrE6HsqtBcTJkglJdLcue6Aufa8lSD52Pr0cVuh164NUAhIbjDr1rnB9enjbmvPB3eEQm6hmztX+uADQgBtGlsG0G7s2yc99JC0cKG7JHJ7WbJDIbc7PjdXuuMOaerUNjClfSwmPf20dPfd0kcf1T6I9sDz3LwBBQXuiNbOnf0eEdBsxADanWhUevRRaf586dNP2+56KBx2s/8OHSrNmeN2Q7e5XfI1Ne6Iz7vukt57r/ZBtTXJhahXL2n2bOnKK931oIF2ghhAu3XokPTkk9LSpW4Cnng8+OuiSMSdIdGxo9sFctVV7t82v7U9kXAHeCxZ4v49eLD2wQZVcmFJzuJ0xRXS5ZdLHTr4PTKgxREDMGHvXumFF6Tf/lb6wx+kAweCsy5KrnNOOsmdKfHDH7qrDHbs6PfIUuTgQemVV6Tly91Wgz17gvNkJMeRkeGu1jhpkvSDH7jdAkA7RgzAnKoqadWq2nXRZ5+5d97J9UAqXxHJs+9iMff5wIHSj37kAuCss9r3AZDHFI+7qXqXL5eee04qLXW3p6W5Qkrl/p2jn/SePV2NTZwojR2b4pmbgGAhBmBaPO7mzXn7bWnzZvfx/vvSX/9auzvh6BV4Q6Sl1Q2L9HQ3rfKQIVJenvv47nddDOAIpaXS+vXShx+6j02b3PWsq6rc15Mr8MY+GUeGRTjsTgU87TTp1FPdkzFsmLsyVpvfHwM0DTEAHEMs5ibVS66TPvxQ2rJF+vJLt15KfsRibkWfnu52JWdkuIPLBw5065hBg9y/ffsafNffUuJxaccOV2rJJ6O01J0+cuCAOzgk+YSkpdU+IenpbjbAAQNqCywvz03XGPjTMYDWRQwAAGAc71UAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACMIwYAADCOGAAAwDhiAAAA44gBAACM+3/SEA9yKFMnowAAAABJRU5ErkJggg==",
-      "text/plain": [
-       "<Figure size 640x480 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
-      "\n",
-      "\u001b[32m***** Response from calling function \"python\" *****\u001b[0m\n",
-      "(0.0, 1.0, 0.0, 1.0)\n",
-      "\u001b[32m***************************************************\u001b[0m\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
-      "\n",
-      "The two agents have been drawn, each represented as a circle, and an example of their dialogue is displayed above them. Since `plt.show()` was not to be included, the plot is not displayed here, but the agents along with their dialogue would appear within the figure's coordinate system, which extends from 0 to 1 on both the x and y axes.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
-      "\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
-      "\n",
-      "TERMINATE\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': \"Draw two agents chatting with each other with an example dialog. Don't add plt.show().\", 'role': 'assistant'}, {'function_call': {'arguments': '{\\n  \"cell\": \"import matplotlib.pyplot as plt\\\\nimport numpy as np\\\\n\\\\nfig, ax = plt.subplots()\\\\n\\\\n# Define the agents as circles\\\\nagent1 = plt.Circle((0.4, 0.5), 0.1, color=\\'blue\\')\\\\nagent2 = plt.Circle((0.6, 0.5), 0.1, color=\\'red\\')\\\\n\\\\n# Draw the agents\\\\nax.add_artist(agent1)\\\\nax.add_artist(agent2)\\\\n\\\\n# Example dialog boxes\\\\nplt.text(0.28, 0.6, \\\\\"Hello!\\\\\", fontsize=12, bbox=dict(facecolor=\\'white\\', alpha=0.5))\\\\nplt.text(0.58, 0.6, \\\\\"Hi there!\\\\\", fontsize=12, bbox=dict(facecolor=\\'white\\', alpha=0.5))\\\\n\\\\n# Set the limits and remove axes\\\\nax.set_xlim(0, 1)\\\\nax.set_ylim(0, 1)\\\\nax.axis(\\'off\\')\\\\n\"\\n}', 'name': 'python'}, 'content': None, 'role': 'assistant'}, {'content': '(0.0, 1.0, 0.0, 1.0)', 'name': 'python', 'role': 'function'}, {'content': \"The two agents have been drawn, each represented as a circle, and an example of their dialogue is displayed above them. Since `plt.show()` was not to be included, the plot is not displayed here, but the agents along with their dialogue would appear within the figure's coordinate system, which extends from 0 to 1 on both the x and y axes.\", 'role': 'user'}, {'content': '', 'role': 'assistant'}, {'content': 'TERMINATE', 'role': 'user'}], summary='', cost=({'total_cost': 0.04767, 'gpt-4': {'cost': 0.04767, 'prompt_tokens': 973, 'completion_tokens': 308, 'total_tokens': 1281}}, {'total_cost': 0.04767, 'gpt-4': {'cost': 0.04767, 'prompt_tokens': 973, 'completion_tokens': 308, 'total_tokens': 1281}}), human_input=[])"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "llm_config = {\n",
-    "    \"model\": \"gpt-4-1106-preview\",\n",
-    "    \"functions\": [\n",
-    "        {\n",
-    "            \"name\": \"python\",\n",
-    "            \"description\": \"run cell in ipython and return the execution result.\",\n",
-    "            \"parameters\": {\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"cell\": {\n",
-    "                        \"type\": \"string\",\n",
-    "                        \"description\": \"Valid Python cell to execute.\",\n",
-    "                    }\n",
-    "                },\n",
-    "                \"required\": [\"cell\"],\n",
-    "            },\n",
-    "        },\n",
-    "        {\n",
-    "            \"name\": \"sh\",\n",
-    "            \"description\": \"run a shell script and return the execution result.\",\n",
-    "            \"parameters\": {\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"script\": {\n",
-    "                        \"type\": \"string\",\n",
-    "                        \"description\": \"Valid shell script to execute.\",\n",
-    "                    }\n",
-    "                },\n",
-    "                \"required\": [\"script\"],\n",
-    "            },\n",
-    "        },\n",
-    "    ],\n",
-    "    \"config_list\": config_list,\n",
-    "    \"timeout\": 120,\n",
-    "}\n",
-    "\n",
-    "chatbot = CompressibleAgent(\n",
-    "    name=\"chatbot\",\n",
-    "    system_message=\"For coding tasks, only use the functions you have been provided with. Reply TERMINATE when the task is done.\",\n",
-    "    llm_config=llm_config,\n",
-    "    compress_config={\n",
-    "        \"mode\": \"COMPRESS\",\n",
-    "        \"trigger_count\": 600,  # set this to a large number for less frequent compression\n",
-    "        \"verbose\": True,  # set this to False to suppress the compression log\n",
-    "        \"leave_last_n\": 2,\n",
-    "    },\n",
-    ")\n",
-    "\n",
-    "# create a UserProxyAgent instance named \"user_proxy\"\n",
-    "user_proxy = autogen.UserProxyAgent(\n",
-    "    name=\"user_proxy\",\n",
-    "    is_termination_msg=lambda x: x.get(\"content\", \"\") and x.get(\"content\", \"\").rstrip().endswith(\"TERMINATE\"),\n",
-    "    human_input_mode=\"NEVER\",\n",
-    "    max_consecutive_auto_reply=10,\n",
-    "    code_execution_config={\n",
-    "        \"work_dir\": \"coding\",\n",
-    "        \"use_docker\": False,\n",
-    "    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n",
-    ")\n",
-    "\n",
-    "\n",
-    "def exec_python(cell):\n",
-    "    ipython = get_ipython()\n",
-    "    result = ipython.run_cell(cell)\n",
-    "    log = str(result.result)\n",
-    "    if result.error_before_exec is not None:\n",
-    "        log += f\"\\n{result.error_before_exec}\"\n",
-    "    if result.error_in_exec is not None:\n",
-    "        log += f\"\\n{result.error_in_exec}\"\n",
-    "    return log\n",
-    "\n",
-    "\n",
-    "def exec_sh(script):\n",
-    "    return user_proxy.execute_code_blocks([(\"sh\", script)])\n",
-    "\n",
-    "\n",
-    "# register the functions\n",
-    "user_proxy.register_function(\n",
-    "    function_map={\n",
-    "        \"python\": exec_python,\n",
-    "        \"sh\": exec_sh,\n",
-    "    }\n",
-    ")\n",
-    "\n",
-    "# start the conversation\n",
-    "user_proxy.initiate_chat(\n",
-    "    chatbot,\n",
-    "    message=\"Draw two agents chatting with each other with an example dialog. Don't add plt.show().\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example 3\n",
-    "This example is from [agent_chat_web_info.ipynb](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_web_info.ipynb). \n",
-    "We use this example to demonstrate how to pass in a customized compression function. We pass in an compression function `constrain_num_messages`, which constrains the number of messages to be 3 or less. \n",
-    "The customized function should accept a list of messages as input and return a tuple of `(is_success: bool, compressed_messages: List[Dict])`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "Show me the YTD gain of 10 largest technology companies as of today.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "To compute the Year-To-Date (YTD) gains of the 10 largest technology companies, I can fetch the latest stock price and the closing price from the last trading day of the previous year. Then calculate the percentage increase for each company.\n",
-      "\n",
-      "First, we should fetch the current stock prices and the closing prices as of the last trading day of the previous year for these companies. For this, we can use a financial data API like Alpha Vantage, Yahoo Finance, or similar, which would require an API key and internet access, but I can't perform actions that require internet access.\n",
-      "\n",
-      "Instead, I will provide you with Python code that you'd need to run on your local machine. This code utilizes the `yfinance` Python library, which is widely used for retrieving historical market data from Yahoo Finance. If you don't have `yfinance` installed, you'll need to install it by running `pip install yfinance` in your command line.\n",
-      "\n",
-      "Here is the code you'll need to execute:\n",
-      "\n",
-      "```python\n",
-      "# filename: ytd_gains.py\n",
-      "\n",
-      "import yfinance as yf\n",
-      "from datetime import datetime, timedelta\n",
-      "\n",
-      "# Define the ticker symbols for the 10 largest tech companies.\n",
-      "# This is a sample list and may not represent the current top 10 companies.\n",
-      "# You would need to replace this with the actual tickers of the top 10 tech companies.\n",
-      "tech_companies = [\"AAPL\", \"MSFT\", \"GOOGL\", \"AMZN\", \"FB\", \"TSLA\", \"NVDA\", \"V\", \"ADBE\", \"INTC\"]\n",
-      "\n",
-      "# Compute the last day of the last year\n",
-      "end_of_last_year = datetime(datetime.now().year - 1, 12, 31)\n",
-      "\n",
-      "# Retrieve the data and calculate YTD gain for each company\n",
-      "ytd_gains = {}\n",
-      "for symbol in tech_companies:\n",
-      "    try:\n",
-      "        # Fetch historical data\n",
-      "        stock = yf.Ticker(symbol)\n",
-      "        last_price = stock.history(period=\"1d\")['Close'][-1]\n",
-      "        prev_close = stock.history(start=end_of_last_year, end=end_of_last_year + timedelta(days=1))['Close'][0]\n",
-      "\n",
-      "        # Calculate YTD gain\n",
-      "        ytd_gain = ((last_price - prev_close) / prev_close) * 100\n",
-      "        ytd_gains[symbol] = ytd_gain\n",
-      "    except Exception as e:\n",
-      "        # Handle errors by skipping the company and printing an error message\n",
-      "        print(f\"Error retrieving data for {symbol}: {e}\")\n",
-      "\n",
-      "# Print the YTD gains\n",
-      "for symbol, gain in ytd_gains.items():\n",
-      "    print(f\"{symbol}: {gain:.2f}% YTD Gain\")\n",
-      "\n",
-      "```\n",
-      "\n",
-      "Make sure that `yfinance` is installed and then run this Python script (`ytd_gains.py`). The script will print out the YTD gains for the listed technology companies as a percentage.\n",
-      "\n",
-      "Note that the list of the 10 largest technology companies must be updated to reflect the current market situation. If you do not have the updated list, let me know, and I can attempt to retrieve this information for you using different methods.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Error retrieving data for AAPL: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for MSFT: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for GOOGL: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for AMZN: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for FB: index -1 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for TSLA: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for NVDA: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for V: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for ADBE: index 0 is out of bounds for axis 0 with size 0\n",
-      "Error retrieving data for INTC: index 0 is out of bounds for axis 0 with size 0\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "It seems that the script encountered an error when trying to fetch data for the provided ticker symbols. This might be because either the market is closed and the latest data is not yet available, or because the dates specified do not have available data (e.g., the end of last year might have been a weekend or holiday).\n",
-      "\n",
-      "Let's adjust the code to be more robust by checking for valid trading days around the end of last year and to handle cases where the stock data might not be available. Here's the updated code:\n",
-      "\n",
-      "```python\n",
-      "# filename: ytd_gains.py\n",
-      "\n",
-      "import yfinance as yf\n",
-      "from datetime import datetime\n",
-      "\n",
-      "def get_last_valid_trading_day(year):\n",
-      "    last_day = datetime(year, 12, 31)\n",
-      "    # Start moving backwards from December 31st to find the last valid trading day of the year\n",
-      "    while True:\n",
-      "        stock_data = yf.download('SPY', start=last_day.strftime('%Y-%m-%d'), end=(last_day + timedelta(days=1)).strftime('%Y-%m-%d'))\n",
-      "        if not stock_data.empty:\n",
-      "            return last_day\n",
-      "        last_day -= timedelta(days=1)\n",
-      "\n",
-      "# Define the ticker symbols for the 10 largest tech companies.\n",
-      "tech_companies = [\"AAPL\", \"MSFT\", \"GOOGL\", \"AMZN\", \"FB\", \"TSLA\", \"NVDA\", \"V\", \"ADBE\", \"INTC\"]\n",
-      "\n",
-      "# Compute the last valid trading day of the last year\n",
-      "end_of_last_year = get_last_valid_trading_day(datetime.now().year - 1)\n",
-      "\n",
-      "# Retrieve the data and calculate YTD gain for each company\n",
-      "ytd_gains = {}\n",
-      "for symbol in tech_companies:\n",
-      "    try:\n",
-      "        # Fetch historical data\n",
-      "        stock = yf.Ticker(symbol)\n",
-      "        last_price = stock.history(period='1d')['Close'].iloc[-1]\n",
-      "        prev_close = stock.history(start=end_of_last_year, end=end_of_last_year + timedelta(days=1))['Close'].iloc[0]\n",
-      "\n",
-      "        # Calculate YTD gain\n",
-      "        ytd_gain = ((last_price - prev_close) / prev_close) * 100\n",
-      "        ytd_gains[symbol] = ytd_gain\n",
-      "    except Exception as e:\n",
-      "        # Handle errors by skipping the company and printing an error message\n",
-      "        print(f\"Error retrieving data for {symbol}: {e}\")\n",
-      "\n",
-      "# Print the YTD gains\n",
-      "for symbol, gain in ytd_gains.items():\n",
-      "    print(f\"{symbol}: {gain:.2f}% YTD Gain\")\n",
-      "```\n",
-      "\n",
-      "Please execute the updated `ytd_gains.py` script. The function `get_last_valid_trading_day` will find the last valid trading day of the previous year to handle non-trading days correctly, and the fetch for current price will get the latest available data point even if the market has not yet closed for today.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 1 (execution failed)\n",
-      "Code output: \n",
-      "Traceback (most recent call last):\n",
-      "  File \"ytd_gains.py\", line 19, in <module>\n",
-      "    end_of_last_year = get_last_valid_trading_day(datetime.now().year - 1)\n",
-      "  File \"ytd_gains.py\", line 10, in get_last_valid_trading_day\n",
-      "    stock_data = yf.download('SPY', start=last_day.strftime('%Y-%m-%d'), end=(last_day + timedelta(days=1)).strftime('%Y-%m-%d'))\n",
-      "NameError: name 'timedelta' is not defined\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[35mToken Count (including 468 tokens from system msg and function descriptions). Before compression : 2115 | After: 1223\u001b[0m\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "It seems I forgot to import the `timedelta` class from the `datetime` module. I will correct this oversight and provide the updated code. Here is the corrected code including the missed import:\n",
-      "\n",
-      "```python\n",
-      "# filename: ytd_gains.py\n",
-      "\n",
-      "import yfinance as yf\n",
-      "from datetime import datetime, timedelta\n",
-      "\n",
-      "def get_last_valid_trading_day(year):\n",
-      "    last_day = datetime(year, 12, 31)\n",
-      "    # Start moving backwards from December 31st to find the last valid trading day of the year\n",
-      "    while True:\n",
-      "        stock_data = yf.download('SPY', start=last_day.strftime('%Y-%m-%d'), end=(last_day + timedelta(days=1)).strftime('%Y-%m-%d'))\n",
-      "        if not stock_data.empty:\n",
-      "            return last_day\n",
-      "        last_day -= timedelta(days=1)\n",
-      "\n",
-      "# Define the ticker symbols for the 10 largest tech companies.\n",
-      "tech_companies = [\"AAPL\", \"MSFT\", \"GOOGL\", \"AMZN\", \"FB\", \"TSLA\", \"NVDA\", \"V\", \"ADBE\", \"INTC\"]\n",
-      "\n",
-      "# Compute the last valid trading day of the last year\n",
-      "end_of_last_year = get_last_valid_trading_day(datetime.now().year - 1)\n",
-      "\n",
-      "# Retrieve the data and calculate YTD gain for each company\n",
-      "ytd_gains = {}\n",
-      "for symbol in tech_companies:\n",
-      "    try:\n",
-      "        # Fetch historical data\n",
-      "        stock = yf.Ticker(symbol)\n",
-      "        last_price = stock.history(period='1d')['Close'].iloc[-1]\n",
-      "        prev_close = stock.history(start=end_of_last_year, end=end_of_last_year + timedelta(days=1))['Close'].iloc[0]\n",
-      "\n",
-      "        # Calculate YTD gain\n",
-      "        ytd_gain = ((last_price - prev_close) / prev_close) * 100\n",
-      "        ytd_gains[symbol] = ytd_gain\n",
-      "    except Exception as e:\n",
-      "        # Handle errors by skipping the company and printing an error message\n",
-      "        print(f\"Error retrieving data for {symbol}: {e}\")\n",
-      "\n",
-      "# Print the YTD gains\n",
-      "for symbol, gain in ytd_gains.items():\n",
-      "    print(f\"{symbol}: {gain:.2f}% YTD Gain\")\n",
-      "```\n",
-      "\n",
-      "Please execute the corrected `ytd_gains.py` script. This should resolve the `NameError` and provide you with the YTD gain for each company.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "Error retrieving data for FB: single positional indexer is out-of-bounds\n",
-      "AAPL: -6.57% YTD Gain\n",
-      "MSFT: 10.70% YTD Gain\n",
-      "GOOGL: -1.83% YTD Gain\n",
-      "AMZN: 17.30% YTD Gain\n",
-      "TSLA: -18.45% YTD Gain\n",
-      "NVDA: 66.15% YTD Gain\n",
-      "V: 8.96% YTD Gain\n",
-      "ADBE: -4.30% YTD Gain\n",
-      "INTC: -12.54% YTD Gain\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[35mToken Count (including 468 tokens from system msg and function descriptions). Before compression : 1865 | After: 1133\u001b[0m\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "It looks like an error occurred while retrieving data for FB, which stands for Facebook. This can happen if the ticker symbol has changed or if there was an issue with the data for the date range we were trying to fetch. Facebook, Inc. has been renamed to Meta Platforms, Inc., and its stock symbol is now \"META.\" \n",
-      "\n",
-      "I will update the list of technology companies to include \"META\" instead of \"FB\" and run the code again:\n",
-      "\n",
-      "```python\n",
-      "# filename: ytd_gains_updated.py\n",
-      "\n",
-      "import yfinance as yf\n",
-      "from datetime import datetime, timedelta\n",
-      "\n",
-      "def get_last_valid_trading_day(year):\n",
-      "    last_day = datetime(year, 12, 31)\n",
-      "    # Start moving backwards from December 31st to find the last valid trading day of the year\n",
-      "    while True:\n",
-      "        stock_data = yf.download('SPY', start=last_day.strftime('%Y-%m-%d'), end=(last_day + timedelta(days=1)).strftime('%Y-%m-%d'))\n",
-      "        if not stock_data.empty:\n",
-      "            return last_day\n",
-      "        last_day -= timedelta(days=1)\n",
-      "\n",
-      "# Define the ticker symbols for the 10 largest tech companies with FB replaced by META.\n",
-      "tech_companies = [\"AAPL\", \"MSFT\", \"GOOGL\", \"AMZN\", \"META\", \"TSLA\", \"NVDA\", \"V\", \"ADBE\", \"INTC\"]\n",
-      "\n",
-      "# Compute the last valid trading day of the last year\n",
-      "end_of_last_year = get_last_valid_trading_day(datetime.now().year - 1)\n",
-      "\n",
-      "# Retrieve the data and calculate YTD gain for each company\n",
-      "ytd_gains = {}\n",
-      "for symbol in tech_companies:\n",
-      "    try:\n",
-      "        # Fetch historical data\n",
-      "        stock = yf.Ticker(symbol)\n",
-      "        last_price = stock.history(period='1d')['Close'].iloc[-1]\n",
-      "        prev_close = stock.history(start=end_of_last_year, end=end_of_last_year + timedelta(days=1))['Close'].iloc[0]\n",
-      "\n",
-      "        # Calculate YTD gain\n",
-      "        ytd_gain = ((last_price - prev_close) / prev_close) * 100\n",
-      "        ytd_gains[symbol] = ytd_gain\n",
-      "    except Exception as e:\n",
-      "        # Handle errors by skipping the company and printing an error message\n",
-      "        print(f\"Error retrieving data for {symbol}: {e}\")\n",
-      "\n",
-      "# Print the YTD gains\n",
-      "for symbol, gain in ytd_gains.items():\n",
-      "    print(f\"{symbol}: {gain:.2f}% YTD Gain\")\n",
-      "```\n",
-      "\n",
-      "Please execute the updated `ytd_gains_updated.py` script to get the YTD gain of Meta Platforms, Inc. along with the other technology companies.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
-      "\u001b[31m\n",
-      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
-      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
-      "\n",
-      "exitcode: 0 (execution succeeded)\n",
-      "Code output: \n",
-      "AAPL: -6.57% YTD Gain\n",
-      "MSFT: 10.70% YTD Gain\n",
-      "GOOGL: -1.83% YTD Gain\n",
-      "AMZN: 17.30% YTD Gain\n",
-      "META: 42.06% YTD Gain\n",
-      "TSLA: -18.45% YTD Gain\n",
-      "NVDA: 66.15% YTD Gain\n",
-      "V: 8.96% YTD Gain\n",
-      "ADBE: -4.30% YTD Gain\n",
-      "INTC: -12.54% YTD Gain\n",
-      "\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[35mToken Count (including 468 tokens from system msg and function descriptions). Before compression : 1828 | After: 1186\u001b[0m\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
-      "\n",
-      "The YTD (Year-To-Date) gain for the 10 largest technology companies as of today are as follows:\n",
-      "\n",
-      "1. Apple Inc. (AAPL): -6.57% YTD Gain\n",
-      "2. Microsoft Corporation (MSFT): 10.70% YTD Gain\n",
-      "3. Alphabet Inc. (GOOGL): -1.83% YTD Gain\n",
-      "4. Amazon.com, Inc. (AMZN): 17.30% YTD Gain\n",
-      "5. Meta Platforms, Inc. (META, formerly FB): 42.06% YTD Gain\n",
-      "6. Tesla, Inc. (TSLA): -18.45% YTD Gain\n",
-      "7. NVIDIA Corporation (NVDA): 66.15% YTD Gain\n",
-      "8. Visa Inc. (V): 8.96% YTD Gain\n",
-      "9. Adobe Inc. (ADBE): -4.30% YTD Gain\n",
-      "10. Intel Corporation (INTC): -12.54% YTD Gain\n",
-      "\n",
-      "These YTD gains reflect the percentage change in the stock price of each company from the last trading day of the previous year to the most recent available trading data.\n",
-      "\n",
-      "If you need any further assistance, please let me know.\n",
-      "\n",
-      "TERMINATE\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[31m\n",
-      ">>>>>>>> NO HUMAN INPUT RECEIVED.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ChatResult(chat_id=None, chat_history=[{'content': 'Show me the YTD gain of 10 largest technology companies as of today.', 'role': 'assistant'}, {'content': 'It looks like an error occurred while retrieving data for FB, which stands for Facebook. This can happen if the ticker symbol has changed or if there was an issue with the data for the date range we were trying to fetch. Facebook, Inc. has been renamed to Meta Platforms, Inc., and its stock symbol is now \"META.\" \\n\\nI will update the list of technology companies to include \"META\" instead of \"FB\" and run the code again:\\n\\n```python\\n# filename: ytd_gains_updated.py\\n\\nimport yfinance as yf\\nfrom datetime import datetime, timedelta\\n\\ndef get_last_valid_trading_day(year):\\n    last_day = datetime(year, 12, 31)\\n    # Start moving backwards from December 31st to find the last valid trading day of the year\\n    while True:\\n        stock_data = yf.download(\\'SPY\\', start=last_day.strftime(\\'%Y-%m-%d\\'), end=(last_day + timedelta(days=1)).strftime(\\'%Y-%m-%d\\'))\\n        if not stock_data.empty:\\n            return last_day\\n        last_day -= timedelta(days=1)\\n\\n# Define the ticker symbols for the 10 largest tech companies with FB replaced by META.\\ntech_companies = [\"AAPL\", \"MSFT\", \"GOOGL\", \"AMZN\", \"META\", \"TSLA\", \"NVDA\", \"V\", \"ADBE\", \"INTC\"]\\n\\n# Compute the last valid trading day of the last year\\nend_of_last_year = get_last_valid_trading_day(datetime.now().year - 1)\\n\\n# Retrieve the data and calculate YTD gain for each company\\nytd_gains = {}\\nfor symbol in tech_companies:\\n    try:\\n        # Fetch historical data\\n        stock = yf.Ticker(symbol)\\n        last_price = stock.history(period=\\'1d\\')[\\'Close\\'].iloc[-1]\\n        prev_close = stock.history(start=end_of_last_year, end=end_of_last_year + timedelta(days=1))[\\'Close\\'].iloc[0]\\n\\n        # Calculate YTD gain\\n        ytd_gain = ((last_price - prev_close) / prev_close) * 100\\n        ytd_gains[symbol] = ytd_gain\\n    except Exception as e:\\n        # Handle errors by skipping the company and printing an error message\\n        print(f\"Error retrieving data for {symbol}: {e}\")\\n\\n# Print the YTD gains\\nfor symbol, gain in ytd_gains.items():\\n    print(f\"{symbol}: {gain:.2f}% YTD Gain\")\\n```\\n\\nPlease execute the updated `ytd_gains_updated.py` script to get the YTD gain of Meta Platforms, Inc. along with the other technology companies.', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\nAAPL: -6.57% YTD Gain\\nMSFT: 10.70% YTD Gain\\nGOOGL: -1.83% YTD Gain\\nAMZN: 17.30% YTD Gain\\nMETA: 42.06% YTD Gain\\nTSLA: -18.45% YTD Gain\\nNVDA: 66.15% YTD Gain\\nV: 8.96% YTD Gain\\nADBE: -4.30% YTD Gain\\nINTC: -12.54% YTD Gain\\n', 'role': 'assistant'}, {'content': 'The YTD (Year-To-Date) gain for the 10 largest technology companies as of today are as follows:\\n\\n1. Apple Inc. (AAPL): -6.57% YTD Gain\\n2. Microsoft Corporation (MSFT): 10.70% YTD Gain\\n3. Alphabet Inc. (GOOGL): -1.83% YTD Gain\\n4. Amazon.com, Inc. (AMZN): 17.30% YTD Gain\\n5. Meta Platforms, Inc. (META, formerly FB): 42.06% YTD Gain\\n6. Tesla, Inc. (TSLA): -18.45% YTD Gain\\n7. NVIDIA Corporation (NVDA): 66.15% YTD Gain\\n8. Visa Inc. (V): 8.96% YTD Gain\\n9. Adobe Inc. (ADBE): -4.30% YTD Gain\\n10. Intel Corporation (INTC): -12.54% YTD Gain\\n\\nThese YTD gains reflect the percentage change in the stock price of each company from the last trading day of the previous year to the most recent available trading data.\\n\\nIf you need any further assistance, please let me know.\\n\\nTERMINATE', 'role': 'user'}], summary='The YTD (Year-To-Date) gain for the 10 largest technology companies as of today are as follows:\\n\\n1. Apple Inc. (AAPL): -6.57% YTD Gain\\n2. Microsoft Corporation (MSFT): 10.70% YTD Gain\\n3. Alphabet Inc. (GOOGL): -1.83% YTD Gain\\n4. Amazon.com, Inc. (AMZN): 17.30% YTD Gain\\n5. Meta Platforms, Inc. (META, formerly FB): 42.06% YTD Gain\\n6. Tesla, Inc. (TSLA): -18.45% YTD Gain\\n7. NVIDIA Corporation (NVDA): 66.15% YTD Gain\\n8. Visa Inc. (V): 8.96% YTD Gain\\n9. Adobe Inc. (ADBE): -4.30% YTD Gain\\n10. Intel Corporation (INTC): -12.54% YTD Gain\\n\\nThese YTD gains reflect the percentage change in the stock price of each company from the last trading day of the previous year to the most recent available trading data.\\n\\nIf you need any further assistance, please let me know.\\n\\n', cost=({'total_cost': 0.31437, 'gpt-4': {'cost': 0.31437, 'prompt_tokens': 5401, 'completion_tokens': 2539, 'total_tokens': 7940}}, {'total_cost': 0.31437, 'gpt-4': {'cost': 0.31437, 'prompt_tokens': 5401, 'completion_tokens': 2539, 'total_tokens': 7940}}), human_input=[''])"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "def constrain_num_messages(messages):\n",
-    "    \"\"\"Constrain the number of messages to 3.\n",
-    "\n",
-    "    This is an example of a customized compression function.\n",
-    "\n",
-    "    Returns:\n",
-    "        bool: whether the compression is successful.\n",
-    "        list: the compressed messages.\n",
-    "    \"\"\"\n",
-    "    if len(messages) <= 3:\n",
-    "        # do nothing\n",
-    "        return False, None\n",
-    "\n",
-    "    # save the first and last two messages\n",
-    "    return True, messages[:1] + messages[-2:]\n",
-    "\n",
-    "\n",
-    "# create a CompressibleAgent instance named \"assistant\"\n",
-    "assistant = CompressibleAgent(\n",
-    "    name=\"assistant\",\n",
-    "    llm_config={\n",
-    "        \"timeout\": 600,\n",
-    "        \"cache_seed\": 43,\n",
-    "        \"config_list\": config_list,\n",
-    "        \"model\": \"gpt-4-1106-preview\",\n",
-    "    },\n",
-    "    compress_config={\n",
-    "        \"mode\": \"CUSTOMIZED\",\n",
-    "        \"compress_function\": constrain_num_messages,  # this is required for customized compression\n",
-    "        \"trigger_count\": 1600,\n",
-    "    },\n",
-    ")\n",
-    "\n",
-    "# create a UserProxyAgent instance named \"user_proxy\"\n",
-    "user_proxy = autogen.UserProxyAgent(\n",
-    "    name=\"user_proxy\",\n",
-    "    human_input_mode=\"TERMINATE\",\n",
-    "    max_consecutive_auto_reply=10,\n",
-    "    is_termination_msg=lambda x: x.get(\"content\", \"\").rstrip().endswith(\"TERMINATE\")\n",
-    "    or x.get(\"content\", \"\").rstrip().endswith(\"TERMINATE.\"),\n",
-    "    code_execution_config={\n",
-    "        \"work_dir\": \"web\",\n",
-    "        \"use_docker\": False,\n",
-    "    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n",
-    "    system_message=\"\"\"Reply TERMINATE if the task has been solved at full satisfaction.\n",
-    "Otherwise, reply CONTINUE, or the reason why the task is not solved yet.\"\"\",\n",
-    ")\n",
-    "\n",
-    "user_proxy.initiate_chat(\n",
-    "    assistant,\n",
-    "    message=\"\"\"Show me the YTD gain of 10 largest technology companies as of today.\"\"\",\n",
-    ")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebook/agentchat_groupchat_RAG.ipynb b/notebook/agentchat_groupchat_RAG.ipynb
index 1057deabf92..e18bd99c151 100644
--- a/notebook/agentchat_groupchat_RAG.ipynb
+++ b/notebook/agentchat_groupchat_RAG.ipynb
@@ -35,14 +35,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "LLM models:  ['gpt4-1106-preview', 'gpt-35-turbo', 'gpt-35-turbo-0613']\n"
+      "LLM models:  ['gpt-35-turbo', 'gpt4-1106-preview', 'gpt-4o']\n"
      ]
     }
    ],
@@ -75,18 +75,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/lijiang1/anaconda3/envs/autogen/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
-      "  torch.utils._pytree._register_pytree_node(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "def termination_msg(x):\n",
     "    return isinstance(x, dict) and \"TERMINATE\" == str(x.get(\"content\", \"\"))[-9:].upper()\n",
@@ -205,15 +196,9 @@
     "        n_results: Annotated[int, \"number of results\"] = 3,\n",
     "    ) -> str:\n",
     "        boss_aid.n_results = n_results  # Set the number of results to be retrieved.\n",
-    "        # Check if we need to update the context.\n",
-    "        update_context_case1, update_context_case2 = boss_aid._check_update_context(message)\n",
-    "        if (update_context_case1 or update_context_case2) and boss_aid.update_context:\n",
-    "            boss_aid.problem = message if not hasattr(boss_aid, \"problem\") else boss_aid.problem\n",
-    "            _, ret_msg = boss_aid._generate_retrieve_user_reply(message)\n",
-    "        else:\n",
-    "            _context = {\"problem\": message, \"n_results\": n_results}\n",
-    "            ret_msg = boss_aid.message_generator(boss_aid, None, _context)\n",
-    "        return ret_msg if ret_msg else message\n",
+    "        _context = {\"problem\": message, \"n_results\": n_results}\n",
+    "        ret_msg = boss_aid.message_generator(boss_aid, None, _context)\n",
+    "        return ret_msg or message\n",
     "\n",
     "    boss_aid.human_input_mode = \"NEVER\"  # Disable human input for boss_aid since it only retrieves content.\n",
     "\n",
@@ -255,7 +240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -266,137 +251,130 @@
       "\n",
       "How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
-      "\n",
-      "To use Spark for parallel training in FLAML (Fast and Lightweight AutoML), you would need to set up a Spark cluster and utilize the `spark` backend for joblib, which FLAML uses internally for parallel training. Here’s an example of how you might set up and use Spark with FLAML for AutoML tasks:\n",
-      "\n",
-      "Firstly, ensure that you have the Spark cluster set up and the `pyspark` and `joblib-spark` packages installed in your environment. You can install the required packages using pip if they are not already installed:\n",
+      "--------------------------------------------------------------------------------\n",
+      "How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
-      "```python\n",
-      "!pip install flaml pyspark joblib-spark\n",
-      "```\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "Here's a sample code snippet that demonstrates how to use FLAML with Spark for parallel training:\n",
+      "To use Spark for parallel training in FLAML, you need to install `pyspark` package and set up a Spark cluster. Here's some sample code for using Spark in FLAML:\n",
       "\n",
       "```python\n",
       "from flaml import AutoML\n",
       "from pyspark.sql import SparkSession\n",
-      "from sklearn.datasets import load_digits\n",
-      "from joblibspark import register_spark\n",
       "\n",
-      "# Initialize a Spark session\n",
-      "spark = SparkSession.builder \\\n",
-      "    .master(\"local[*]\") \\\n",
-      "    .appName(\"FLAML_Spark_Example\") \\\n",
-      "    .getOrCreate()\n",
+      "# create a SparkSession\n",
+      "spark = SparkSession.builder.appName(\"FLAML-Spark\").getOrCreate()\n",
       "\n",
-      "# Register the joblib spark backend\n",
-      "register_spark()  # This registers the backend for parallel processing\n",
-      "\n",
-      "# Load sample data\n",
-      "X, y = load_digits(return_X_y=True)\n",
-      "\n",
-      "# Initialize an AutoML instance\n",
+      "# create a FLAML AutoML object with Spark backend\n",
       "automl = AutoML()\n",
       "\n",
-      "# Define the settings for the AutoML run\n",
+      "# load data from Spark DataFrame\n",
+      "data = spark.read.format(\"csv\").option(\"header\", \"true\").load(\"data.csv\")\n",
+      "\n",
+      "# specify the target column and task type\n",
       "settings = {\n",
-      "    \"time_budget\": 60,  # Total running time in seconds\n",
-      "    \"metric\": 'accuracy',  # Primary metric for evaluation\n",
-      "    \"task\": 'classification',  # Task type\n",
-      "    \"n_jobs\": -1,  # Number of jobs to run in parallel (use -1 for all)\n",
-      "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # List of estimators to consider\n",
-      "    \"log_file_name\": \"flaml_log.txt\",  # Log file name\n",
+      "    \"time_budget\": 60, # time budget in seconds\n",
+      "    \"metric\": 'accuracy',\n",
+      "    \"task\": 'classification',\n",
       "}\n",
       "\n",
-      "# Run the AutoML search with Spark backend\n",
-      "automl.fit(X_train=X, y_train=y, **settings)\n",
+      "# train and validate models in parallel using Spark\n",
+      "best_model = automl.fit(data, **settings)\n",
       "\n",
-      "# Output the best model and its performance\n",
-      "print(f\"Best ML model: {automl.model}\")\n",
-      "print(f\"Best ML model's accuracy: {automl.best_loss}\")\n",
+      "# print the best model and its metadata\n",
+      "print(automl.model_name)\n",
+      "print(automl.best_model)\n",
+      "print(automl.best_config)\n",
       "\n",
-      "# Stop the Spark session\n",
+      "# stop the SparkSession\n",
       "spark.stop()\n",
+      "\n",
+      "# terminate the code execution\n",
+      "TERMINATE\n",
       "```\n",
       "\n",
-      "The `register_spark()` function from `joblib-spark` is used to register the Spark backend with joblib, which is utilized for parallel training within FLAML. The `n_jobs=-1` parameter tells FLAML to use all available Spark executors for parallel training.\n",
+      "Note that this is just a sample code, you may need to modify it to fit your specific use case.\n",
       "\n",
-      "Please note that the actual process of setting up a Spark cluster can be complex and might involve additional steps such as configuring Spark workers, allocating resources, and more, which are beyond the scope of this code snippet.\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Code_Reviewer\n",
+      "\u001b[0m\n",
+      "\u001b[33mCode_Reviewer\u001b[0m (to chat_manager):\n",
       "\n",
-      "If you encounter any issues or need to adjust configurations for your specific Spark setup, please refer to the Spark and FLAML documentation for more details.\n",
       "\n",
-      "When you run the code, ensure that your Spark cluster is properly configured and accessible from your Python environment. Adjust the `.master(\"local[*]\")` to point to your Spark master's URL if you are running a cluster that is not local.\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
-      "To use Spark for parallel training in FLAML (Fast and Lightweight AutoML), you would need to set up a Spark cluster and utilize the `spark` backend for joblib, which FLAML uses internally for parallel training. Here’s an example of how you might set up and use Spark with FLAML for AutoML tasks:\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "Firstly, ensure that you have the Spark cluster set up and the `pyspark` and `joblib-spark` packages installed in your environment. You can install the required packages using pip if they are not already installed:\n",
+      "Do you have any questions related to the code sample?\n",
       "\n",
-      "```python\n",
-      "!pip install flaml pyspark joblib-spark\n",
-      "```\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "Here's a sample code snippet that demonstrates how to use FLAML with Spark for parallel training:\n",
+      "No, I don't have any questions related to the code sample.\n",
       "\n",
-      "```python\n",
-      "from flaml import AutoML\n",
-      "from pyspark.sql import SparkSession\n",
-      "from sklearn.datasets import load_digits\n",
-      "from joblibspark import register_spark\n",
-      "\n",
-      "# Initialize a Spark session\n",
-      "spark = SparkSession.builder \\\n",
-      "    .master(\"local[*]\") \\\n",
-      "    .appName(\"FLAML_Spark_Example\") \\\n",
-      "    .getOrCreate()\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Register the joblib spark backend\n",
-      "register_spark()  # This registers the backend for parallel processing\n",
+      "Great, let me know if you need any further assistance.\n",
       "\n",
-      "# Load sample data\n",
-      "X, y = load_digits(return_X_y=True)\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Initialize an AutoML instance\n",
-      "automl = AutoML()\n",
+      "Sure, will do. Thank you!\n",
       "\n",
-      "# Define the settings for the AutoML run\n",
-      "settings = {\n",
-      "    \"time_budget\": 60,  # Total running time in seconds\n",
-      "    \"metric\": 'accuracy',  # Primary metric for evaluation\n",
-      "    \"task\": 'classification',  # Task type\n",
-      "    \"n_jobs\": -1,  # Number of jobs to run in parallel (use -1 for all)\n",
-      "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # List of estimators to consider\n",
-      "    \"log_file_name\": \"flaml_log.txt\",  # Log file name\n",
-      "}\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Run the AutoML search with Spark backend\n",
-      "automl.fit(X_train=X, y_train=y, **settings)\n",
+      "You're welcome! Have a great day ahead!\n",
       "\n",
-      "# Output the best model and its performance\n",
-      "print(f\"Best ML model: {automl.model}\")\n",
-      "print(f\"Best ML model's accuracy: {automl.best_loss}\")\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "# Stop the Spark session\n",
-      "spark.stop()\n",
-      "```\n",
+      "You too, have a great day ahead!\n",
       "\n",
-      "The `register_spark()` function from `joblib-spark` is used to register the Spark backend with joblib, which is utilized for parallel training within FLAML. The `n_jobs=-1` parameter tells FLAML to use all available Spark executors for parallel training.\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "Please note that the actual process of setting up a Spark cluster can be complex and might involve additional steps such as configuring Spark workers, allocating resources, and more, which are beyond the scope of this code snippet.\n",
+      "Thank you! Goodbye!\n",
       "\n",
-      "If you encounter any issues or need to adjust configurations for your specific Spark setup, please refer to the Spark and FLAML documentation for more details.\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
       "\n",
-      "When you run the code, ensure that your Spark cluster is properly configured and accessible from your Python environment. Adjust the `.master(\"local[*]\")` to point to your Spark master's URL if you are running a cluster that is not local.\n",
+      "Goodbye!\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Code_Reviewer\n",
+      "\u001b[0m\n",
       "\u001b[33mCode_Reviewer\u001b[0m (to chat_manager):\n",
       "\n",
       "TERMINATE\n",
@@ -420,38 +398,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-04-07 18:26:04,562 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `groupchat`.\u001b[0m\n"
+      "Trying to create collection.\n"
      ]
     },
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Trying to create collection.\n"
+      "2024-08-14 06:59:09,583 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `groupchat`.\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-04-07 18:26:05,485 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 1 chunks.\u001b[0m\n",
-      "Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1\n",
-      "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n"
+      "2024-08-14 06:59:09,902 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n",
+      "2024-08-14 06:59:09,912 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding.\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n",
+      "VectorDB returns doc_ids:  [['bdfbc921', 'b2c1ec51', '0e57e70f']]\n",
       "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n",
+      "\u001b[32mAdding content of doc b2c1ec51 to context.\u001b[0m\n",
       "\u001b[33mBoss_Assistant\u001b[0m (to chat_manager):\n",
       "\n",
       "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n",
@@ -595,10 +573,90 @@
       "```\n",
       "\n",
       "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n",
+      "# Integrate - Spark\n",
+      "\n",
+      "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n",
+      "\n",
+      "- Use Spark ML estimators for AutoML.\n",
+      "- Use Spark to run training in parallel spark jobs.\n",
+      "\n",
+      "## Spark ML Estimators\n",
+      "\n",
+      "FLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\n",
+      "\n",
+      "### Data\n",
+      "\n",
+      "For Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\n",
+      "\n",
+      "This utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\n",
+      "\n",
+      "This function also accepts optional arguments `index_col` and `default_index_type`.\n",
+      "\n",
+      "- `index_col` is the column name to use as the index, default is None.\n",
+      "- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\n",
+      "\n",
+      "Here is an example code snippet for Spark Data:\n",
+      "\n",
+      "```python\n",
+      "import pandas as pd\n",
+      "from flaml.automl.spark.utils import to_pandas_on_spark\n",
+      "\n",
+      "# Creating a dictionary\n",
+      "data = {\n",
+      "    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n",
+      "    \"Age_Years\": [20, 15, 10, 7, 25],\n",
+      "    \"Price\": [100000, 200000, 300000, 240000, 120000],\n",
+      "}\n",
+      "\n",
+      "# Creating a pandas DataFrame\n",
+      "dataframe = pd.DataFrame(data)\n",
+      "label = \"Price\"\n",
+      "\n",
+      "# Convert to pandas-on-spark dataframe\n",
+      "psdf = to_pandas_on_spark(dataframe)\n",
+      "```\n",
+      "\n",
+      "To use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\n",
+      "\n",
+      "Here is an example of how to use it:\n",
+      "\n",
+      "```python\n",
+      "from pyspark.ml.feature import VectorAssembler\n",
+      "\n",
+      "columns = psdf.columns\n",
+      "feature_cols = [col for col in columns if col != label]\n",
+      "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
+      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
+      "```\n",
+      "\n",
+      "Later in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\n",
+      "\n",
+      "### Estimators\n",
+      "\n",
+      "#### Model List\n",
+      "\n",
+      "- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\n",
+      "\n",
+      "#### Usage\n",
+      "\n",
+      "First, prepare your data in the required format as described in the previous section.\n",
+      "\n",
+      "By including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven't specified them.\n",
+      "\n",
+      "Here is an example code snippet using SparkML models in AutoML:\n",
+      "\n",
+      "```python\n",
+      "import flaml\n",
+      "\n",
+      "# prepare your data in pandas-on-spark format as we previously mentioned\n",
       "\n",
       "\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[32mAdding content of doc b2c1ec51 to context.\u001b[0m\n",
       "\u001b[33mBoss_Assistant\u001b[0m (to chat_manager):\n",
       "\n",
       "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n",
@@ -742,58 +800,188 @@
       "```\n",
       "\n",
       "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n",
+      "# Integrate - Spark\n",
       "\n",
+      "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n",
       "\n",
+      "- Use Spark ML estimators for AutoML.\n",
+      "- Use Spark to run training in parallel spark jobs.\n",
       "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
+      "## Spark ML Estimators\n",
+      "\n",
+      "FLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\n",
+      "\n",
+      "### Data\n",
+      "\n",
+      "For Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\n",
+      "\n",
+      "This utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\n",
+      "\n",
+      "This function also accepts optional arguments `index_col` and `default_index_type`.\n",
+      "\n",
+      "- `index_col` is the column name to use as the index, default is None.\n",
+      "- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\n",
+      "\n",
+      "Here is an example code snippet for Spark Data:\n",
       "\n",
       "```python\n",
-      "from flaml.automl import AutoML\n",
-      "from flaml.automl.spark.utils import to_pandas_on_spark\n",
-      "from pyspark.ml.feature import VectorAssembler\n",
       "import pandas as pd\n",
+      "from flaml.automl.spark.utils import to_pandas_on_spark\n",
       "\n",
-      "# Sample data in a dictionary\n",
+      "# Creating a dictionary\n",
       "data = {\n",
       "    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n",
       "    \"Age_Years\": [20, 15, 10, 7, 25],\n",
       "    \"Price\": [100000, 200000, 300000, 240000, 120000],\n",
       "}\n",
       "\n",
-      "# Convert dictionary to pandas DataFrame\n",
+      "# Creating a pandas DataFrame\n",
       "dataframe = pd.DataFrame(data)\n",
       "label = \"Price\"\n",
       "\n",
-      "# Convert pandas DataFrame to pandas-on-spark DataFrame\n",
+      "# Convert to pandas-on-spark dataframe\n",
       "psdf = to_pandas_on_spark(dataframe)\n",
+      "```\n",
+      "\n",
+      "To use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\n",
+      "\n",
+      "Here is an example of how to use it:\n",
+      "\n",
+      "```python\n",
+      "from pyspark.ml.feature import VectorAssembler\n",
       "\n",
-      "# Use VectorAssembler to merge feature columns into a single vector column\n",
-      "feature_cols = [col for col in psdf.columns if col != label]\n",
+      "columns = psdf.columns\n",
+      "feature_cols = [col for col in columns if col != label]\n",
       "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
-      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\", label]\n",
+      "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
+      "```\n",
+      "\n",
+      "Later in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\n",
+      "\n",
+      "### Estimators\n",
+      "\n",
+      "#### Model List\n",
+      "\n",
+      "- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\n",
+      "\n",
+      "#### Usage\n",
+      "\n",
+      "First, prepare your data in the required format as described in the previous section.\n",
+      "\n",
+      "By including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven't specified them.\n",
+      "\n",
+      "Here is an example code snippet using SparkML models in AutoML:\n",
+      "\n",
+      "```python\n",
+      "import flaml\n",
+      "\n",
+      "# prepare your data in pandas-on-spark format as we previously mentioned\n",
+      "\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
+      "\n",
+      "```python\n",
+      "from flaml import AutoML\n",
+      "\n",
+      "# Assuming psdf is the pandas-on-spark dataframe and label is the name of the target variable\n",
+      "# Presuming that the data conversion and feature vectorization have been done as shown in the context\n",
       "\n",
-      "# Initialize AutoML instance\n",
       "automl = AutoML()\n",
       "\n",
-      "# AutoML settings\n",
-      "automl_settings = {\n",
-      "    \"time_budget\": 30,  # Total running time in seconds\n",
-      "    \"metric\": \"r2\",     # Evaluation metric\n",
-      "    \"task\": \"regression\",\n",
-      "    \"n_concurrent_trials\": 2,   # Number of concurrent Spark jobs\n",
-      "    \"use_spark\": True,          # Enable Spark for parallel training\n",
-      "    \"force_cancel\": True,       # Force cancel Spark jobs if they exceed the time budget\n",
-      "    \"estimator_list\": [\"lgbm_spark\"]  # Optional: Specific estimator to use\n",
+      "settings = {\n",
+      "    \"time_budget\": 120,  # for example, set the time budget to 2 minutes\n",
+      "    \"metric\": \"accuracy\",  # assuming a classification problem, change to \"r2\" for regression\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # specify the Spark estimator\n",
+      "    \"task\": \"classification\",  # assuming a classification problem, change to \"regression\" for regression\n",
+      "    \"n_concurrent_trials\": 2,  # number of concurrent Spark jobs\n",
+      "    \"use_spark\": True,  # enable distributed training using Spark\n",
       "}\n",
       "\n",
-      "# Run AutoML fit with pandas-on-spark dataframe\n",
-      "automl.fit(\n",
-      "    dataframe=psdf,\n",
-      "    label=label,\n",
-      "    **automl_settings,\n",
-      ")\n",
+      "automl.fit(dataframe=psdf, label=label, **settings)\n",
+      "```\n",
+      "Please adjust the `metric`, `task`, and other settings according to your specific problem and requirements. This code snippet sets up FLAML with Spark for parallel training using the LightGBM Spark estimator, with two concurrent trials. Make sure your Spark environment is properly configured to run the distributed training.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n",
+      "\u001b[33mSenior_Python_Engineer\u001b[0m (to chat_manager):\n",
+      "\n",
+      "```python\n",
+      "from flaml import AutoML\n",
+      "\n",
+      "# Assuming psdf is the pandas-on-spark dataframe and label is the name of the target variable\n",
+      "# Presuming that the data conversion and feature vectorization have been done as shown in the context\n",
+      "\n",
+      "automl = AutoML()\n",
+      "\n",
+      "settings = {\n",
+      "    \"time_budget\": 120,  # for example, set the time budget to 2 minutes\n",
+      "    \"metric\": \"accuracy\",  # assuming a classification problem, change to \"r2\" for regression\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # specify the Spark estimator\n",
+      "    \"task\": \"classification\",  # assuming a classification problem, change to \"regression\" for regression\n",
+      "    \"n_concurrent_trials\": 2,  # number of concurrent Spark jobs\n",
+      "    \"use_spark\": True,  # enable distributed training using Spark\n",
+      "}\n",
+      "\n",
+      "automl.fit(dataframe=psdf, label=label, **settings)\n",
+      "```\n",
+      "Please adjust the `metric`, `task`, and other settings according to your specific problem and requirements. This code snippet sets up FLAML with Spark for parallel training using the LightGBM Spark estimator, with two concurrent trials. Make sure your Spark environment is properly configured to run the distributed training.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Code_Reviewer\n",
+      "\u001b[0m\n",
+      "\u001b[33mCode_Reviewer\u001b[0m (to chat_manager):\n",
+      "\n",
+      "The provided code snippet is mostly correct and follows the guidelines provided in the context. However, there is one minor issue: if we are using the pandas-on-spark DataFrame `psdf`, the `fit` method should be called with `dataframe` and `label` arguments, not `X_train` and `y_train`.\n",
+      "\n",
+      "This is because, with FLAML and Spark integration, the `fit` method expects the entire data as a single pandas-on-spark DataFrame along with the name of the target variable as `label`, rather than being provided with separate feature and target data as it would expect with standard pandas DataFrames.\n",
+      "\n",
+      "Here's the correct code snippet reflecting this:\n",
+      "\n",
+      "```python\n",
+      "from flaml import AutoML\n",
+      "\n",
+      "# Assuming psdf is the pandas-on-spark dataframe and label is the name of the target variable\n",
+      "# Presuming that the data conversion and feature vectorization have been done as shown in the context\n",
+      "\n",
+      "automl = AutoML()\n",
+      "\n",
+      "settings = {\n",
+      "    \"time_budget\": 120,  # for example, set the time budget to 2 minutes\n",
+      "    \"metric\": \"accuracy\",  # assuming a classification problem, change to \"r2\" for regression\n",
+      "    \"estimator_list\": [\"lgbm_spark\"],  # specify the Spark estimator\n",
+      "    \"task\": \"classification\",  # assuming a classification problem, change to \"regression\" for regression\n",
+      "    \"n_concurrent_trials\": 2,  # number of concurrent Spark jobs\n",
+      "    \"use_spark\": True,  # enable distributed training using Spark\n",
+      "}\n",
+      "\n",
+      "# Use dataframe and label parameters to fit the model\n",
+      "automl.fit(dataframe=psdf, label=label, **settings)\n",
       "```\n",
+      "\n",
+      "Please ensure that your Spark cluster is correctly configured to support distributed training, and adjust the `metric`, `task`, and other settings as needed for your specific use case.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Boss_Assistant\n",
+      "\u001b[0m\n",
+      "\u001b[33mBoss_Assistant\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Reply `TERMINATE` if the task is done.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
+      "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
+      "\n",
       "TERMINATE\n",
       "\n",
       "--------------------------------------------------------------------------------\n"
@@ -816,7 +1004,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -828,14 +1016,26 @@
       "How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
       "\u001b[32m***** Suggested function call: retrieve_content *****\u001b[0m\n",
       "Arguments: \n",
-      "{\"message\":\"using Apache Spark for parallel training in FLAML with sample code\"}\n",
+      "{\"message\":\"How to use spark for parallel training in FLAML? Give me sample code.\",\"n_results\":3}\n",
       "\u001b[32m*****************************************************\u001b[0m\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Boss\n",
+      "\u001b[0m\n",
       "\u001b[35m\n",
       ">>>>>>>> EXECUTING FUNCTION retrieve_content...\u001b[0m\n"
      ]
@@ -844,16 +1044,19 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1\n",
-      "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n"
+      "2024-08-14 07:09:05,717 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `groupchat`.\u001b[0m\n",
+      "2024-08-14 07:09:05,845 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "VectorDB returns doc_ids:  [['bdfbc921']]\n",
+      "Trying to create collection.\n",
+      "VectorDB returns doc_ids:  [['bdfbc921', 'b2c1ec51', '0e57e70f']]\n",
       "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n",
+      "\u001b[32mAdding content of doc b2c1ec51 to context.\u001b[0m\n",
+      "\u001b[32mAdding content of doc 0e57e70f to context.\u001b[0m\n",
       "\u001b[33mBoss\u001b[0m (to chat_manager):\n",
       "\n",
       "\u001b[32m***** Response from calling function (retrieve_content) *****\u001b[0m\n",
@@ -867,7 +1070,7 @@
       "# your code\n",
       "```\n",
       "\n",
-      "User's question is: using Apache Spark for parallel training in FLAML with sample code\n",
+      "User's question is: How to use spark for parallel training in FLAML? Give me sample code.\n",
       "\n",
       "Context is: # Integrate - Spark\n",
       "\n",
@@ -998,27 +1201,7 @@
       "```\n",
       "\n",
       "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n",
-      "\n",
-      "\n",
-      "\u001b[32m*************************************************************\u001b[0m\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mBoss\u001b[0m (to chat_manager):\n",
-      "\n",
-      "\u001b[32m***** Response from calling function (retrieve_content) *****\u001b[0m\n",
-      "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n",
-      "context provided by the user.\n",
-      "If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\n",
-      "For code generation, you must obey the following rules:\n",
-      "Rule 1. You MUST NOT install any packages because all the packages needed are already installed.\n",
-      "Rule 2. You must follow the formats below to write your code:\n",
-      "```language\n",
-      "# your code\n",
-      "```\n",
-      "\n",
-      "User's question is: using Apache Spark for parallel training in FLAML with sample code\n",
-      "\n",
-      "Context is: # Integrate - Spark\n",
+      "# Integrate - Spark\n",
       "\n",
       "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n",
       "\n",
@@ -1094,7 +1277,6 @@
       "import flaml\n",
       "\n",
       "# prepare your data in pandas-on-spark format as we previously mentioned\n",
-      "\n",
       "automl = flaml.AutoML()\n",
       "settings = {\n",
       "    \"time_budget\": 30,\n",
@@ -1152,84 +1334,87 @@
       "\u001b[32m*************************************************************\u001b[0m\n",
       "\n",
       "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Product_Manager\n",
+      "\u001b[0m\n",
       "\u001b[33mProduct_Manager\u001b[0m (to chat_manager):\n",
       "\n",
-      "To use Apache Spark for parallel training in FLAML, you can follow these steps:\n",
+      "To use Spark for parallel training in FLAML, follow these steps:\n",
       "\n",
-      "1. Ensure your data is in the required pandas-on-spark format.\n",
-      "2. Use Spark ML estimators by including them in the `estimator_list`.\n",
-      "3. Set `use_spark` to `True` for parallel tuning.\n",
+      "## Steps:\n",
       "\n",
-      "Here's a sample code demonstrating how to use Spark for parallel training in FLAML:\n",
+      "1. **Prepare Your Data:**\n",
+      "   Convert your data into a pandas-on-spark DataFrame using `to_pandas_on_spark` function.\n",
+      "\n",
+      "2. **Configure Spark Settings:**\n",
+      "   Set the `use_spark` parameter to `True` to enable Spark for parallel training jobs.\n",
+      "\n",
+      "3. **Run the AutoML Experiment:**\n",
+      "   Configure the AutoML settings and run the experiment.\n",
+      "\n",
+      "## Sample Code:\n",
       "\n",
       "```python\n",
+      "import pandas as pd\n",
       "import flaml\n",
       "from flaml.automl.spark.utils import to_pandas_on_spark\n",
-      "import pandas as pd\n",
-      "from pyspark.ml.feature import VectorAssembler\n",
       "\n",
-      "# Sample data in a pandas DataFrame\n",
+      "# Prepare your data\n",
       "data = {\n",
       "    \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n",
       "    \"Age_Years\": [20, 15, 10, 7, 25],\n",
       "    \"Price\": [100000, 200000, 300000, 240000, 120000],\n",
       "}\n",
-      "label = \"Price\"\n",
       "\n",
-      "# Creating a pandas DataFrame\n",
       "dataframe = pd.DataFrame(data)\n",
+      "label = \"Price\"\n",
       "\n",
       "# Convert to pandas-on-spark dataframe\n",
       "psdf = to_pandas_on_spark(dataframe)\n",
       "\n",
-      "# Prepare features using VectorAssembler\n",
+      "# Use VectorAssembler to format data for Spark ML\n",
+      "from pyspark.ml.feature import VectorAssembler\n",
+      "\n",
       "columns = psdf.columns\n",
       "feature_cols = [col for col in columns if col != label]\n",
       "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
       "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n",
       "\n",
-      "# Initialize AutoML\n",
+      "# Configure AutoML settings\n",
       "automl = flaml.AutoML()\n",
-      "\n",
-      "# Configure settings for AutoML\n",
-      "settings = {\n",
-      "    \"time_budget\": 30,  # time budget in seconds\n",
+      "automl_settings = {\n",
+      "    \"time_budget\": 30,\n",
       "    \"metric\": \"r2\",\n",
-      "    \"estimator_list\": [\"lgbm_spark\"],  # using Spark ML estimators\n",
       "    \"task\": \"regression\",\n",
-      "    \"n_concurrent_trials\": 2,  # number of parallel trials\n",
-      "    \"use_spark\": True,  # enable parallel training using Spark\n",
-      "    \"force_cancel\": True,  # force cancel Spark jobs if time_budget is exceeded\n",
+      "    \"n_concurrent_trials\": 2,\n",
+      "    \"use_spark\": True,\n",
+      "    \"force_cancel\": True,  # Optionally force cancel jobs that exceed time budget\n",
       "}\n",
       "\n",
-      "# Start the training\n",
-      "automl.fit(dataframe=psdf, label=label, **settings)\n",
+      "# Run the AutoML experiment\n",
+      "automl.fit(\n",
+      "    dataframe=psdf,\n",
+      "    label=label,\n",
+      "    **automl_settings,\n",
+      ")\n",
       "```\n",
       "\n",
-      "In this code snippet:\n",
-      "- The `to_pandas_on_spark` function is used to convert the pandas DataFrame to a pandas-on-spark DataFrame.\n",
-      "- `VectorAssembler` is used to transform feature columns into a single vector column.\n",
-      "- The `AutoML` object is created, and settings are configured for the AutoML run, including setting `use_spark` to `True` for parallel training.\n",
-      "- The `fit` method is called to start the automated machine learning process.\n",
+      "This code demonstrates how to prepare your data, configure Spark settings for parallel training, and run the AutoML experiment using FLAML with Spark.\n",
       "\n",
-      "By using these settings, FLAML will train the models in parallel using Spark, which can accelerate the training process on large models and datasets.\n",
+      "You can find more information and examples in the [FLAML documentation](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb).\n",
       "\n",
       "TERMINATE\n",
       "\n",
-      "--------------------------------------------------------------------------------\n"
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: Senior_Python_Engineer\n",
+      "\u001b[0m\n"
      ]
     }
    ],
    "source": [
     "call_rag_chat()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -1256,7 +1441,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.12.4"
   }
  },
  "nbformat": 4,
diff --git a/notebook/agentchat_microsoft_fabric.ipynb b/notebook/agentchat_microsoft_fabric.ipynb
index 97cab73b4ea..8e128d733e6 100644
--- a/notebook/agentchat_microsoft_fabric.ipynb
+++ b/notebook/agentchat_microsoft_fabric.ipynb
@@ -20,7 +20,7 @@
     "\n",
     "In this notebook, we demonstrate several examples:\n",
     "- 1. How to use `AssistantAgent` and `UserProxyAgent` to write code and execute the code.\n",
-    "- 2. How to use `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
+    "- 2. How to use `AssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
     "- 3. How to use `MultimodalConversableAgent` to chat with images.\n",
     "\n",
     "### Requirements\n",
@@ -139,6 +139,7 @@
     "    }\n",
     "    return config_list, llm_config\n",
     "\n",
+    "\n",
     "config_list, llm_config = get_config_list()\n",
     "\n",
     "assert len(config_list) > 0\n",
@@ -401,7 +402,7 @@
    },
    "source": [
     "### Example 2\n",
-    "How to use `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
+    "How to use `AssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n",
     "\n",
     "Check out this [blog](https://microsoft.github.io/autogen/blog/2023/10/18/RetrieveChat) for more details."
    ]
@@ -479,11 +480,11 @@
    },
    "outputs": [],
    "source": [
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config=llm_config,\n",
diff --git a/notebook/lats_search.ipynb b/notebook/lats_search.ipynb
new file mode 100644
index 00000000000..01b4449890e
--- /dev/null
+++ b/notebook/lats_search.ipynb
@@ -0,0 +1,1059 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "211913e6",
+   "metadata": {},
+   "source": [
+    "# Language Agent Tree Search\n",
+    "\n",
+    "[Language Agent Tree Search](https://arxiv.org/abs/2310.04406) (LATS), by Zhou, et. al, is a general LLM agent search algorithm that combines reflection/evaluation and search (specifically Monte-Carlo tree search) to achieve stronger overall task performance by leveraging inference-time compute.\n",
+    "\n",
+    "It has four main phases consisting of six steps:\n",
+    "\n",
+    "1. Select: pick the best next state to progress from, based on its aggregate value. \n",
+    "2. Expand and simulate: sample n potential actions to take and execute them in parallel.\n",
+    "3. Reflect + Evaluate: observe the outcomes of these actions and score the decisions based on reflection (and possibly external feedback if available)\n",
+    "4. Backpropagate: update the scores of the root trajectories based on the outcomes.\n",
+    "\n",
+    "![lats](https://i.postimg.cc/NjQScLTv/image.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da705b29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import logging\n",
+    "import os\n",
+    "import uuid\n",
+    "from typing import Any, Dict, List\n",
+    "\n",
+    "from autogen import AssistantAgent, ConversableAgent, GroupChat, UserProxyAgent, config_list_from_json"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "293fd23b",
+   "metadata": {},
+   "source": [
+    "# Configure logging\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a02f8a2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logging.basicConfig(level=logging.INFO)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1d5ca06b",
+   "metadata": {},
+   "source": [
+    "# Set environment variables\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1566c7df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"AUTOGEN_USE_DOCKER\"] = \"0\"  # Disable Docker usage globally for Autogen\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_API_KEY\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "585654ac",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "Install `autogen` (for the LLM framework and agents)\n",
+    "\n",
+    "Required packages: autogen\n",
+    "\n",
+    "Please ensure these packages are installed before running this script"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "586bcf0f",
+   "metadata": {},
+   "source": [
+    "# Directly create the config_list with the API key"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9eaf711f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_list = [{\"model\": \"gpt-4o-mini\", \"api_key\": \"YOUR_API_KEY\"}]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79701018",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not config_list:\n",
+    "    raise ValueError(\"Failed to create configuration. Please check the API key.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9041e0a3",
+   "metadata": {},
+   "source": [
+    "### Reflection Class\n",
+    "\n",
+    "The reflection chain will score agent outputs based on the decision and the tool responses."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce0288e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "\n",
+    "class Reflection(BaseModel):\n",
+    "    reflections: str = Field(\n",
+    "        description=\"The critique and reflections on the sufficiency, superfluency,\"\n",
+    "        \" and general quality of the response\"\n",
+    "    )\n",
+    "    score: int = Field(\n",
+    "        description=\"Score from 0-10 on the quality of the candidate response.\",\n",
+    "        gte=0,\n",
+    "        lte=10,\n",
+    "    )\n",
+    "    found_solution: bool = Field(description=\"Whether the response has fully solved the question or task.\")\n",
+    "\n",
+    "    def as_message(self):\n",
+    "        return {\"role\": \"human\", \"content\": f\"Reasoning: {self.reflections}\\nScore: {self.score}\"}\n",
+    "\n",
+    "    @property\n",
+    "    def normalized_score(self) -> float:\n",
+    "        return self.score / 10.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1f6d3476",
+   "metadata": {},
+   "source": [
+    "## Tree State\n",
+    "\n",
+    "LATS is based on a (greedy) Monte-Carlo tree search. For each search steps, it picks the node with the highest \"upper confidence bound\", which is a metric that balances exploitation (highest average reward) and exploration (lowest visits). Starting from that node, it generates N (5 in this case) new candidate actions to take, and adds them to the tree. It stops searching either when it has generated a valid solution OR when it has reached the maximum number of rollouts (search tree depth)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6d0d7a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import os\n",
+    "from collections import deque\n",
+    "from typing import Optional"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "305a29d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Node:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        messages: List[Dict[str, str]],\n",
+    "        reflection: Optional[Reflection] = None,\n",
+    "        parent: Optional[\"Node\"] = None,\n",
+    "    ):\n",
+    "        self.messages = messages\n",
+    "        self.parent = parent\n",
+    "        self.children: List[\"Node\"] = []\n",
+    "        self.value = 0.0\n",
+    "        self.visits = 0\n",
+    "        self.reflection = reflection\n",
+    "        self.depth = parent.depth + 1 if parent is not None else 1\n",
+    "        self._is_solved = reflection.found_solution if reflection else False\n",
+    "        if self._is_solved:\n",
+    "            self._mark_tree_as_solved()\n",
+    "        if reflection:\n",
+    "            self.backpropagate(reflection.normalized_score)\n",
+    "\n",
+    "    def __repr__(self) -> str:\n",
+    "        return (\n",
+    "            f\"<Node value={self.value:.2f}, visits={self.visits},\" f\" depth={self.depth}, is_solved={self._is_solved}>\"\n",
+    "        )\n",
+    "\n",
+    "    @property\n",
+    "    def is_solved(self) -> bool:\n",
+    "        \"\"\"If any solutions exist, we can end the search.\"\"\"\n",
+    "        return self._is_solved\n",
+    "\n",
+    "    @property\n",
+    "    def is_terminal(self):\n",
+    "        return not self.children\n",
+    "\n",
+    "    @property\n",
+    "    def best_child(self):\n",
+    "        \"\"\"Select the child with the highest UCT to search next.\"\"\"\n",
+    "        if not self.children:\n",
+    "            return None\n",
+    "        all_nodes = self._get_all_children()\n",
+    "        return max(all_nodes, key=lambda child: child.upper_confidence_bound())\n",
+    "\n",
+    "    @property\n",
+    "    def best_child_score(self):\n",
+    "        \"\"\"Return the child with the highest value.\"\"\"\n",
+    "        if not self.children:\n",
+    "            return None\n",
+    "        return max(self.children, key=lambda child: int(child.is_solved) * child.value)\n",
+    "\n",
+    "    @property\n",
+    "    def height(self) -> int:\n",
+    "        \"\"\"Check for how far we've rolled out the tree.\"\"\"\n",
+    "        if self.children:\n",
+    "            return 1 + max([child.height for child in self.children])\n",
+    "        return 1\n",
+    "\n",
+    "    def upper_confidence_bound(self, exploration_weight=1.0):\n",
+    "        \"\"\"Return the UCT score. This helps balance exploration vs. exploitation of a branch.\"\"\"\n",
+    "        if self.parent is None:\n",
+    "            raise ValueError(\"Cannot obtain UCT from root node\")\n",
+    "        if self.visits == 0:\n",
+    "            return self.value\n",
+    "        # Encourages exploitation of high-value trajectories\n",
+    "        average_reward = self.value / self.visits\n",
+    "        exploration_term = math.sqrt(math.log(self.parent.visits) / self.visits)\n",
+    "        return average_reward + exploration_weight * exploration_term\n",
+    "\n",
+    "    def backpropagate(self, reward: float):\n",
+    "        \"\"\"Update the score of this node and its parents.\"\"\"\n",
+    "        node = self\n",
+    "        while node:\n",
+    "            node.visits += 1\n",
+    "            node.value = (node.value * (node.visits - 1) + reward) / node.visits\n",
+    "            node = node.parent\n",
+    "\n",
+    "    def get_messages(self, include_reflections: bool = True):\n",
+    "        if include_reflections and self.reflection:\n",
+    "            return self.messages + [self.reflection.as_message()]\n",
+    "        return self.messages\n",
+    "\n",
+    "    def get_trajectory(self, include_reflections: bool = True) -> List[Dict[str, str]]:\n",
+    "        \"\"\"Get messages representing this search branch.\"\"\"\n",
+    "        messages = []\n",
+    "        node = self\n",
+    "        while node:\n",
+    "            messages.extend(node.get_messages(include_reflections=include_reflections)[::-1])\n",
+    "            node = node.parent\n",
+    "        # Reverse the final back-tracked trajectory to return in the correct order\n",
+    "        return messages[::-1]  # root solution, reflection, child 1, ...\n",
+    "\n",
+    "    def _get_all_children(self):\n",
+    "        all_nodes = []\n",
+    "        nodes = deque()\n",
+    "        nodes.append(self)\n",
+    "        while nodes:\n",
+    "            node = nodes.popleft()\n",
+    "            all_nodes.extend(node.children)\n",
+    "            for n in node.children:\n",
+    "                nodes.append(n)\n",
+    "        return all_nodes\n",
+    "\n",
+    "    def get_best_solution(self):\n",
+    "        \"\"\"Return the best solution from within the current sub-tree.\"\"\"\n",
+    "        all_nodes = [self] + self._get_all_children()\n",
+    "        best_node = max(\n",
+    "            all_nodes,\n",
+    "            # We filter out all non-terminal, non-solution trajectories\n",
+    "            key=lambda node: int(node.is_terminal and node.is_solved) * node.value,\n",
+    "        )\n",
+    "        return best_node\n",
+    "\n",
+    "    def _mark_tree_as_solved(self):\n",
+    "        parent = self.parent\n",
+    "        while parent:\n",
+    "            parent._is_solved = True\n",
+    "            parent = parent.parent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98b719d9",
+   "metadata": {},
+   "source": [
+    "The main component is the tree, represented by the root node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "586d953a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing_extensions import TypedDict\n",
+    "\n",
+    "\n",
+    "class TreeState(TypedDict):\n",
+    "    # The full tree\n",
+    "    root: Node\n",
+    "    # The original input\n",
+    "    input: str"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a61a6ee",
+   "metadata": {},
+   "source": [
+    "## Define Language Agent\n",
+    "\n",
+    "Our agent will have three primary LLM-powered processes:\n",
+    "\n",
+    "1. Reflect: score the action based on the tool response.\n",
+    "2. Initial response: to create the root node and start the search.\n",
+    "3. Expand: generate 5 candidate \"next steps\" from the best spot in the current tree\n",
+    "\n",
+    "For more \"Grounded\" tool applications (such as code synthesis), you could integrate code execution into the reflection/reward step. This type of external feedback is very useful."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a9e6c27f",
+   "metadata": {},
+   "source": [
+    "#### Tools\n",
+    "For our example, we will give the language agent a search engine."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffb10a00",
+   "metadata": {},
+   "source": [
+    "Define the UserProxyAgent with web search / tool-use capability\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e467f73e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_proxy = UserProxyAgent(\n",
+    "    name=\"user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=10,\n",
+    "    code_execution_config={\n",
+    "        \"work_dir\": \"web\",\n",
+    "        \"use_docker\": False,\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c2b96b2",
+   "metadata": {},
+   "source": [
+    "Create a ConversableAgent without tools\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "212daaef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assistant_agent = ConversableAgent(\n",
+    "    name=\"assistant_agent\",\n",
+    "    system_message=\"You are an AI assistant capable of helping with various tasks.\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    code_execution_config=False,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "527c1a39",
+   "metadata": {},
+   "source": [
+    "### Reflection\n",
+    "\n",
+    "Self-reflection allows the agent to boostrap, improving its future responses based on the outcome of previous ones. In agents this is more powerful since it can use external feedback to improve."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bdd8a23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reflection_prompt = \"\"\"\n",
+    "Reflect and grade the assistant response to the user question below.\n",
+    "User question: {input}\n",
+    "Assistant response: {candidate}\n",
+    "\n",
+    "Provide your reflection in the following format:\n",
+    "Reflections: [Your detailed critique and reflections]\n",
+    "Score: [A score from 0-10]\n",
+    "Found Solution: [true/false]\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7750d32f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reflection_agent = AssistantAgent(\n",
+    "    name=\"reflection_agent\",\n",
+    "    system_message=\"You are an AI assistant that reflects on and grades responses.\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list,\n",
+    "        \"temperature\": 0.2,\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23f26bf0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def reflection_chain(inputs: Dict[str, Any]) -> Reflection:\n",
+    "    try:\n",
+    "        candidate_content = \"\"\n",
+    "        if \"candidate\" in inputs:\n",
+    "            candidate = inputs[\"candidate\"]\n",
+    "            if isinstance(candidate, list):\n",
+    "                candidate_content = (\n",
+    "                    candidate[-1][\"content\"]\n",
+    "                    if isinstance(candidate[-1], dict) and \"content\" in candidate[-1]\n",
+    "                    else str(candidate[-1])\n",
+    "                )\n",
+    "            elif isinstance(candidate, dict):\n",
+    "                candidate_content = candidate.get(\"content\", str(candidate))\n",
+    "            elif isinstance(candidate, str):\n",
+    "                candidate_content = candidate\n",
+    "            else:\n",
+    "                candidate_content = str(candidate)\n",
+    "\n",
+    "        formatted_prompt = [\n",
+    "            {\"role\": \"system\", \"content\": \"You are an AI assistant that reflects on and grades responses.\"},\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": reflection_prompt.format(input=inputs.get(\"input\", \"\"), candidate=candidate_content),\n",
+    "            },\n",
+    "        ]\n",
+    "        response = reflection_agent.generate_reply(formatted_prompt)\n",
+    "\n",
+    "        # Parse the response\n",
+    "        response_str = str(response)\n",
+    "        lines = response_str.split(\"\\n\")\n",
+    "        reflections = next((line.split(\": \", 1)[1] for line in lines if line.startswith(\"Reflections:\")), \"\")\n",
+    "        score_str = next((line.split(\": \", 1)[1] for line in lines if line.startswith(\"Score:\")), \"0\")\n",
+    "        try:\n",
+    "            if \"/\" in score_str:\n",
+    "                numerator, denominator = map(int, score_str.split(\"/\"))\n",
+    "                score = int((numerator / denominator) * 10)\n",
+    "            else:\n",
+    "                score = int(score_str)\n",
+    "        except ValueError:\n",
+    "            logging.warning(f\"Invalid score value: {score_str}. Defaulting to 0.\")\n",
+    "        score = 0\n",
+    "\n",
+    "        found_solution = next(\n",
+    "            (line.split(\": \", 1)[1].lower() == \"true\" for line in lines if line.startswith(\"Found Solution:\")), False\n",
+    "        )\n",
+    "\n",
+    "        if not reflections:\n",
+    "            logging.warning(\"No reflections found in the response. Using default values.\")\n",
+    "            reflections = \"No reflections provided.\"\n",
+    "\n",
+    "        return Reflection(reflections=reflections, score=score, found_solution=found_solution)\n",
+    "    except Exception as e:\n",
+    "        logging.error(f\"Error in reflection_chain: {str(e)}\", exc_info=True)\n",
+    "        return Reflection(reflections=f\"Error in reflection: {str(e)}\", score=0, found_solution=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc4b9911",
+   "metadata": {},
+   "source": [
+    "### Initial Response\n",
+    "\n",
+    "We start with a single root node, generated by this first step. It responds to the user input either with a tool invocation or a response."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60675131",
+   "metadata": {},
+   "source": [
+    "# Create Autogen agents\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd743ab5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assistant = AssistantAgent(name=\"assistant\", llm_config={\"config_list\": config_list}, code_execution_config=False)\n",
+    "user = UserProxyAgent(\n",
+    "    name=\"user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=10,\n",
+    "    code_execution_config={\"work_dir\": \"web\", \"use_docker\": False},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1f93b734",
+   "metadata": {},
+   "source": [
+    "# Define a function to create the initial prompt\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7e00575",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_initial_prompt(input_text):\n",
+    "    return [\n",
+    "        {\"role\": \"system\", \"content\": \"You are an AI assistant.\"},\n",
+    "        {\"role\": \"user\", \"content\": input_text},\n",
+    "    ]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b8442317",
+   "metadata": {},
+   "source": [
+    "# Function to generate initial response\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7afcd1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_initial_response(state: TreeState) -> TreeState:\n",
+    "    chat_messages = create_initial_prompt(state[\"input\"])\n",
+    "    try:\n",
+    "        # Ensure chat_messages is a list of dictionaries\n",
+    "        if not isinstance(chat_messages, list):\n",
+    "            chat_messages = [{\"role\": \"user\", \"content\": chat_messages}]\n",
+    "\n",
+    "        logging.info(f\"Generating initial response for input: {state['input']}\")\n",
+    "        logging.debug(f\"Chat messages: {chat_messages}\")\n",
+    "\n",
+    "        response = assistant.generate_reply(chat_messages)\n",
+    "        logging.debug(f\"Raw response from assistant: {response}\")\n",
+    "\n",
+    "        # Ensure response is properly formatted as a string\n",
+    "        if isinstance(response, str):\n",
+    "            content = response\n",
+    "        elif isinstance(response, dict) and \"content\" in response:\n",
+    "            content = response[\"content\"]\n",
+    "        elif isinstance(response, list) and len(response) > 0:\n",
+    "            content = response[-1].get(\"content\", str(response[-1]))\n",
+    "        else:\n",
+    "            content = str(response)\n",
+    "\n",
+    "        content = content.strip()\n",
+    "        if not content:\n",
+    "            raise ValueError(\"Generated content is empty after processing\")\n",
+    "\n",
+    "        logging.debug(f\"Processed content: {content[:100]}...\")  # Log first 100 chars\n",
+    "\n",
+    "        # Generate reflection\n",
+    "        reflection_input = {\"input\": state[\"input\"], \"candidate\": content}\n",
+    "        logging.info(\"Generating reflection on the initial response\")\n",
+    "        reflection = reflection_chain(reflection_input)\n",
+    "        logging.debug(f\"Reflection generated: {reflection}\")\n",
+    "\n",
+    "        # Create Node with messages as a list containing a single dict\n",
+    "        messages = [{\"role\": \"assistant\", \"content\": content}]\n",
+    "        root = Node(messages=messages, reflection=reflection)\n",
+    "\n",
+    "        logging.info(\"Initial response and reflection generated successfully\")\n",
+    "        return TreeState(root=root, input=state[\"input\"])\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        logging.error(f\"Error in generate_initial_response: {str(e)}\", exc_info=True)\n",
+    "        return TreeState(root=None, input=state[\"input\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87ef17ca",
+   "metadata": {},
+   "source": [
+    "# Example usage of the generate_initial_response function\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ab75669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "initial_prompt = \"Why is the sky blue?\"\n",
+    "initial_state = TreeState(input=initial_prompt, root=None)\n",
+    "result_state = generate_initial_response(initial_state)\n",
+    "if result_state[\"root\"] is not None:\n",
+    "    print(result_state[\"root\"].messages[0][\"content\"])\n",
+    "else:\n",
+    "    print(\"Failed to generate initial response.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e619223f",
+   "metadata": {},
+   "source": [
+    "#### Starting Node\n",
+    "\n",
+    "We will package up the candidate generation and reflection in a single node of our graph. This is represented by the following function:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "24c052e0",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# Define the function to generate the initial response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "94c92498",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Define the function to generate the initial response\n",
+    "\n",
+    "\n",
+    "def generate_initial_response(state: TreeState) -> TreeState:\n",
+    "    \"\"\"Generate the initial candidate response using Autogen components.\"\"\"\n",
+    "    assistant = AssistantAgent(name=\"assistant\", llm_config={\"config_list\": config_list}, code_execution_config=False)\n",
+    "\n",
+    "    # Generate initial response\n",
+    "    initial_message = [\n",
+    "        {\"role\": \"system\", \"content\": \"You are an AI assistant.\"},\n",
+    "        {\"role\": \"user\", \"content\": state[\"input\"]},\n",
+    "    ]\n",
+    "\n",
+    "    try:\n",
+    "        logging.info(f\"Generating initial response for input: {state['input']}\")\n",
+    "        response = assistant.generate_reply(initial_message)\n",
+    "        logging.debug(f\"Raw response from assistant: {response}\")\n",
+    "\n",
+    "        # Ensure response is properly formatted as a string\n",
+    "        if isinstance(response, str):\n",
+    "            content = response\n",
+    "        elif isinstance(response, dict):\n",
+    "            content = response.get(\"content\", \"\")\n",
+    "            if not content:\n",
+    "                content = json.dumps(response)\n",
+    "        elif isinstance(response, list):\n",
+    "            content = \" \".join(str(item) for item in response)\n",
+    "        else:\n",
+    "            content = str(response)\n",
+    "\n",
+    "        # Ensure content is always a string and not empty\n",
+    "        content = content.strip()\n",
+    "        if not content:\n",
+    "            raise ValueError(\"Generated content is empty after processing\")\n",
+    "\n",
+    "        logging.debug(f\"Final processed content (first 100 chars): {content[:100]}...\")\n",
+    "\n",
+    "        # Generate reflection\n",
+    "        logging.info(\"Generating reflection on the initial response\")\n",
+    "        reflection_input = {\"input\": state[\"input\"], \"candidate\": content}\n",
+    "        reflection = reflection_chain(reflection_input)\n",
+    "        logging.debug(f\"Reflection generated: {reflection}\")\n",
+    "\n",
+    "        if not isinstance(reflection, Reflection):\n",
+    "            raise TypeError(f\"Invalid reflection type: {type(reflection)}. Expected Reflection, got {type(reflection)}\")\n",
+    "\n",
+    "        # Create Node with messages as a list containing a single dict\n",
+    "        messages = [{\"role\": \"assistant\", \"content\": content}]\n",
+    "        logging.debug(f\"Creating Node with messages: {messages}\")\n",
+    "        root = Node(messages=messages, reflection=reflection)\n",
+    "        logging.info(\"Initial response and reflection generated successfully\")\n",
+    "        logging.debug(f\"Created root node: {root}\")\n",
+    "        return TreeState(root=root, input=state[\"input\"])\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        logging.error(f\"Error in generate_initial_response: {str(e)}\", exc_info=True)\n",
+    "        return TreeState(root=None, input=state[\"input\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c58a4074",
+   "metadata": {},
+   "source": [
+    "### Candidate Generation\n",
+    "The following code prompts the same LLM to generate N additional candidates to check.\n",
+    "\n",
+    "This generates N candidate values for a single input to sample actions from the environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27a3a1db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_candidates(messages: list, config: dict):\n",
+    "    n = config.get(\"N\", 5)\n",
+    "    assistant = AssistantAgent(name=\"assistant\", llm_config={\"config_list\": config_list}, code_execution_config=False)\n",
+    "\n",
+    "    candidates = []\n",
+    "    for _ in range(n):\n",
+    "        try:\n",
+    "            # Use the assistant to generate a response\n",
+    "            last_message = messages[-1][\"content\"] if messages and isinstance(messages[-1], dict) else str(messages[-1])\n",
+    "            response = assistant.generate_reply([{\"role\": \"user\", \"content\": last_message}])\n",
+    "            if isinstance(response, str):\n",
+    "                candidates.append(response)\n",
+    "            elif isinstance(response, dict) and \"content\" in response:\n",
+    "                candidates.append(response[\"content\"])\n",
+    "            elif (\n",
+    "                isinstance(response, list) and response and isinstance(response[-1], dict) and \"content\" in response[-1]\n",
+    "            ):\n",
+    "                candidates.append(response[-1][\"content\"])\n",
+    "            else:\n",
+    "                candidates.append(str(response))\n",
+    "        except Exception as e:\n",
+    "            logging.error(f\"Error generating candidate: {str(e)}\")\n",
+    "            candidates.append(\"Failed to generate candidate.\")\n",
+    "\n",
+    "    if not candidates:\n",
+    "        logging.warning(\"No candidates were generated.\")\n",
+    "\n",
+    "    return candidates\n",
+    "\n",
+    "\n",
+    "expansion_chain = generate_candidates"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a47c8161",
+   "metadata": {},
+   "source": [
+    "#### Candidate generation node\n",
+    "\n",
+    "We will package the candidate generation and reflection steps in the following \"expand\" node.\n",
+    "We do all the operations as a batch process to speed up execution."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "175afca7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def expand(state: TreeState, config: Dict[str, Any]) -> dict:\n",
+    "    root = state[\"root\"]\n",
+    "    best_candidate: Node = root.best_child if root.children else root\n",
+    "    messages = best_candidate.get_trajectory()\n",
+    "\n",
+    "    # Generate N candidates using Autogen's generate_candidates function\n",
+    "    new_candidates = generate_candidates(messages, config)\n",
+    "\n",
+    "    # Reflect on each candidate using Autogen's AssistantAgent\n",
+    "    reflections = []\n",
+    "    for candidate in new_candidates:\n",
+    "        reflection = reflection_chain({\"input\": state[\"input\"], \"candidate\": candidate})\n",
+    "        reflections.append(reflection)\n",
+    "\n",
+    "    # Grow tree\n",
+    "    child_nodes = [\n",
+    "        Node([{\"role\": \"assistant\", \"content\": candidate}], parent=best_candidate, reflection=reflection)\n",
+    "        for candidate, reflection in zip(new_candidates, reflections)\n",
+    "    ]\n",
+    "    best_candidate.children.extend(child_nodes)\n",
+    "\n",
+    "    # We have already extended the tree directly, so we just return the state\n",
+    "    return state"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "717b7b93",
+   "metadata": {},
+   "source": [
+    "## Create Tree\n",
+    "\n",
+    "With those two nodes defined, we are ready to define the tree. After each agent step, we have the option of finishing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e309ea9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict, Literal\n",
+    "\n",
+    "\n",
+    "def should_loop(state: Dict[str, Any]) -> Literal[\"expand\", \"end\"]:\n",
+    "    \"\"\"Determine whether to continue the tree search.\"\"\"\n",
+    "    root = state[\"root\"]\n",
+    "    if root.is_solved:\n",
+    "        return \"end\"\n",
+    "    if root.height > 5:\n",
+    "        return \"end\"\n",
+    "    return \"expand\"\n",
+    "\n",
+    "\n",
+    "def run_lats(input_query: str, max_iterations: int = 10):\n",
+    "    import logging\n",
+    "\n",
+    "    logging.basicConfig(level=logging.INFO)\n",
+    "    logger = logging.getLogger(__name__)\n",
+    "\n",
+    "    try:\n",
+    "\n",
+    "        state = {\"input\": input_query, \"root\": None}\n",
+    "        try:\n",
+    "            state = generate_initial_response(state)\n",
+    "            if not isinstance(state, dict) or \"root\" not in state or state[\"root\"] is None:\n",
+    "                logger.error(\"Initial response generation failed or returned invalid state\")\n",
+    "                return \"Failed to generate initial response.\"\n",
+    "            logger.info(\"Initial response generated successfully\")\n",
+    "        except Exception as e:\n",
+    "            logger.error(f\"Error generating initial response: {str(e)}\", exc_info=True)\n",
+    "            return \"Failed to generate initial response due to an unexpected error.\"\n",
+    "\n",
+    "        for iteration in range(max_iterations):\n",
+    "            action = should_loop(state)\n",
+    "            if action == \"end\":\n",
+    "                logger.info(f\"Search ended after {iteration + 1} iterations\")\n",
+    "                break\n",
+    "            try:\n",
+    "                state = expand(\n",
+    "                    state,\n",
+    "                    {\n",
+    "                        \"N\": 5,\n",
+    "                        \"input_query\": input_query,\n",
+    "                    },\n",
+    "                )\n",
+    "                logger.info(f\"Completed iteration {iteration + 1}\")\n",
+    "            except Exception as e:\n",
+    "                logger.error(f\"Error during iteration {iteration + 1}: {str(e)}\", exc_info=True)\n",
+    "                continue\n",
+    "\n",
+    "        if not isinstance(state, dict) or \"root\" not in state or state[\"root\"] is None:\n",
+    "            return \"No valid solution found due to an error in the search process.\"\n",
+    "\n",
+    "        solution_node = state[\"root\"].get_best_solution()\n",
+    "        best_trajectory = solution_node.get_trajectory(include_reflections=False)\n",
+    "        if not best_trajectory:\n",
+    "            return \"No solution found in the search process.\"\n",
+    "\n",
+    "        result = (\n",
+    "            best_trajectory[-1].get(\"content\") if isinstance(best_trajectory[-1], dict) else str(best_trajectory[-1])\n",
+    "        )\n",
+    "        logger.info(\"LATS search completed successfully\")\n",
+    "        return result\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"An unexpected error occurred during LATS execution: {str(e)}\", exc_info=True)\n",
+    "        return f\"An unexpected error occurred: {str(e)}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e274e373",
+   "metadata": {},
+   "source": [
+    "Example usage:\n",
+    "\n",
+    "result = run_lats(\"Write a research report on deep learning.\")\n",
+    "\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa719ff2",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# Example usage of the LATS algorithm with Autogen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "683c0f2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO, format=\"%(asctime)s - %(levelname)s - %(message)s\")\n",
+    "logger = logging.getLogger(__name__)\n",
+    "\n",
+    "\n",
+    "def run_lats_example(question):\n",
+    "    try:\n",
+    "        logger.info(f\"Processing question: {question}\")\n",
+    "        result = run_lats(question)\n",
+    "        logger.info(f\"LATS algorithm completed. Result: {result[:100]}...\")  # Log first 100 chars of result\n",
+    "        print(f\"Question: {question}\")\n",
+    "        print(f\"Answer: {result}\")\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"An error occurred while processing the question: {str(e)}\", exc_info=True)\n",
+    "        print(f\"An error occurred: {str(e)}\")\n",
+    "    finally:\n",
+    "        print(\"---\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4ce778e",
+   "metadata": {},
+   "source": [
+    "# List of example questions\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60fa1f07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "questions = [\n",
+    "    \"Explain how epigenetic modifications can influence gene expression across generations and the implications for evolution.\",\n",
+    "    \"Discuss the challenges of grounding ethical theories in moral realism, especially in light of the is-ought problem introduced by Hume.\",\n",
+    "    \"How does the Riemann Hypothesis relate to the distribution of prime numbers, and why is it significant in number theory?\",\n",
+    "    \"Describe the challenges and theoretical underpinnings of unifying general relativity with quantum mechanics, particularly focusing on string theory and loop quantum gravity.\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a0fed5fe",
+   "metadata": {},
+   "source": [
+    "# Run LATS algorithm for each question\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d1e5754",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i, question in enumerate(questions, 1):\n",
+    "    print(f\"\\nExample {i}:\")\n",
+    "    run_lats_example(question)\n",
+    "\n",
+    "logger.info(\"All examples processed.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af7254a5",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "Congrats on implementing LATS! This is a technique that can be reasonably fast and effective at solving complex agent tasks. A few notes that you probably observed above:\n",
+    "\n",
+    "1. While LATS is effective, the tree rollout process can require additional inference compute time. If you plan to integrate this into a production application, consider streaming intermediate steps to allow users to see the thought process and access intermediate results. Alternatively, you could use it to generate fine-tuning data to enhance single-shot accuracy and avoid lengthy rollouts. The cost of using LATS has significantly decreased since its initial proposal and is expected to continue decreasing.\n",
+    "\n",
+    "2. The effectiveness of the candidate selection process depends on the quality of the rewards generated. In this example, we exclusively use self-reflection as feedback, but if you have access to external feedback sources (such as code test execution), those should be incorporated as suggested above."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "be01ff1e",
+   "metadata": {},
+   "source": [
+    "# \n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/samples/apps/autogen-studio/.gitignore b/samples/apps/autogen-studio/.gitignore
index e1e3c9942ec..549ce16b6db 100644
--- a/samples/apps/autogen-studio/.gitignore
+++ b/samples/apps/autogen-studio/.gitignore
@@ -9,6 +9,9 @@ autogenstudio/web/workdir/*
 autogenstudio/web/ui/*
 autogenstudio/web/skills/user/*
 .release.sh
+.nightly.sh
+
+notebooks/work_dir/*
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/samples/apps/autogen-studio/autogenstudio/chatmanager.py b/samples/apps/autogen-studio/autogenstudio/chatmanager.py
index 84b85673f07..a91401e6663 100644
--- a/samples/apps/autogen-studio/autogenstudio/chatmanager.py
+++ b/samples/apps/autogen-studio/autogenstudio/chatmanager.py
@@ -1,7 +1,5 @@
 import asyncio
-import json
 import os
-import time
 from datetime import datetime
 from queue import Queue
 from typing import Any, Dict, List, Optional, Tuple, Union
@@ -9,12 +7,7 @@
 import websockets
 from fastapi import WebSocket, WebSocketDisconnect
 
-from .datamodel import Message, SocketMessage, Workflow
-from .utils import (
-    extract_successful_code_blocks,
-    get_modified_files,
-    summarize_chat_history,
-)
+from .datamodel import Message
 from .workflowmanager import WorkflowManager
 
 
@@ -82,76 +75,12 @@ def chat(
             connection_id=connection_id,
         )
 
-        workflow = Workflow.model_validate(workflow)
-
         message_text = message.content.strip()
+        result_message: Message = workflow_manager.run(message=f"{message_text}", clear_history=False, history=history)
 
-        start_time = time.time()
-        workflow_manager.run(message=f"{message_text}", clear_history=False)
-        end_time = time.time()
-
-        metadata = {
-            "messages": workflow_manager.agent_history,
-            "summary_method": workflow.summary_method,
-            "time": end_time - start_time,
-            "files": get_modified_files(start_time, end_time, source_dir=work_dir),
-        }
-
-        output = self._generate_output(message_text, workflow_manager, workflow)
-
-        output_message = Message(
-            user_id=message.user_id,
-            role="assistant",
-            content=output,
-            meta=json.dumps(metadata),
-            session_id=message.session_id,
-        )
-
-        return output_message
-
-    def _generate_output(
-        self,
-        message_text: str,
-        workflow_manager: WorkflowManager,
-        workflow: Workflow,
-    ) -> str:
-        """
-        Generates the output response based on the workflow configuration and agent history.
-
-        :param message_text: The text of the incoming message.
-        :param flow: An instance of `WorkflowManager`.
-        :param flow_config: An instance of `AgentWorkFlowConfig`.
-        :return: The output response as a string.
-        """
-
-        output = ""
-        if workflow.summary_method == "last":
-            successful_code_blocks = extract_successful_code_blocks(workflow_manager.agent_history)
-            last_message = (
-                workflow_manager.agent_history[-1]["message"]["content"] if workflow_manager.agent_history else ""
-            )
-            successful_code_blocks = "\n\n".join(successful_code_blocks)
-            output = (last_message + "\n" + successful_code_blocks) if successful_code_blocks else last_message
-        elif workflow.summary_method == "llm":
-            client = workflow_manager.receiver.client
-            status_message = SocketMessage(
-                type="agent_status",
-                data={
-                    "status": "summarizing",
-                    "message": "Summarizing agent dialogue",
-                },
-                connection_id=workflow_manager.connection_id,
-            )
-            self.send(status_message.dict())
-            output = summarize_chat_history(
-                task=message_text,
-                messages=workflow_manager.agent_history,
-                client=client,
-            )
-
-        elif workflow.summary_method == "none":
-            output = ""
-        return output
+        result_message.user_id = message.user_id
+        result_message.session_id = message.session_id
+        return result_message
 
 
 class WebSocketConnectionManager:
diff --git a/samples/apps/autogen-studio/autogenstudio/cli.py b/samples/apps/autogen-studio/autogenstudio/cli.py
index 42642bcd68a..81fee799145 100644
--- a/samples/apps/autogen-studio/autogenstudio/cli.py
+++ b/samples/apps/autogen-studio/autogenstudio/cli.py
@@ -16,7 +16,7 @@ def ui(
     port: int = 8081,
     workers: int = 1,
     reload: Annotated[bool, typer.Option("--reload")] = False,
-    docs: bool = False,
+    docs: bool = True,
     appdir: str = None,
     database_uri: Optional[str] = None,
 ):
@@ -48,6 +48,39 @@ def ui(
     )
 
 
+@app.command()
+def serve(
+    workflow: str = "",
+    host: str = "127.0.0.1",
+    port: int = 8084,
+    workers: int = 1,
+    docs: bool = False,
+):
+    """
+    Serve an API Endpoint based on an AutoGen Studio workflow json file.
+
+    Args:
+        workflow (str): Path to the workflow json file.
+        host (str, optional): Host to run the UI on. Defaults to 127.0.0.1 (localhost).
+        port (int, optional): Port to run the UI on. Defaults to 8081.
+        workers (int, optional): Number of workers to run the UI with. Defaults to 1.
+        reload (bool, optional): Whether to reload the UI on code changes. Defaults to False.
+        docs (bool, optional): Whether to generate API docs. Defaults to False.
+
+    """
+
+    os.environ["AUTOGENSTUDIO_API_DOCS"] = str(docs)
+    os.environ["AUTOGENSTUDIO_WORKFLOW_FILE"] = workflow
+
+    uvicorn.run(
+        "autogenstudio.web.serve:app",
+        host=host,
+        port=port,
+        workers=workers,
+        reload=False,
+    )
+
+
 @app.command()
 def version():
     """
diff --git a/samples/apps/autogen-studio/autogenstudio/database/dbmanager.py b/samples/apps/autogen-studio/autogenstudio/database/dbmanager.py
index 00d3714b63f..6a02a0a7038 100644
--- a/samples/apps/autogen-studio/autogenstudio/database/dbmanager.py
+++ b/samples/apps/autogen-studio/autogenstudio/database/dbmanager.py
@@ -1,3 +1,4 @@
+import threading
 from datetime import datetime
 from typing import Optional
 
@@ -15,15 +16,23 @@
     Skill,
     Workflow,
     WorkflowAgentLink,
+    WorkflowAgentType,
 )
 from .utils import init_db_samples
 
 valid_link_types = ["agent_model", "agent_skill", "agent_agent", "workflow_agent"]
 
 
+class WorkflowAgentMap(SQLModel):
+    agent: Agent
+    link: WorkflowAgentLink
+
+
 class DBManager:
     """A class to manage database operations"""
 
+    _init_lock = threading.Lock()  # Class-level lock
+
     def __init__(self, engine_uri: str):
         connection_args = {"check_same_thread": True} if "sqlite" in engine_uri else {}
         self.engine = create_engine(engine_uri, connect_args=connection_args)
@@ -31,14 +40,15 @@ def __init__(self, engine_uri: str):
 
     def create_db_and_tables(self):
         """Create a new database and tables"""
-        try:
-            SQLModel.metadata.create_all(self.engine)
+        with self._init_lock:  # Use the lock
             try:
-                init_db_samples(self)
+                SQLModel.metadata.create_all(self.engine)
+                try:
+                    init_db_samples(self)
+                except Exception as e:
+                    logger.info("Error while initializing database samples: " + str(e))
             except Exception as e:
-                logger.info("Error while initializing database samples: " + str(e))
-        except Exception as e:
-            logger.info("Error while creating database tables:" + str(e))
+                logger.info("Error while creating database tables:" + str(e))
 
     def upsert(self, model: SQLModel):
         """Create a new entity"""
@@ -62,7 +72,7 @@ def upsert(self, model: SQLModel):
                 session.refresh(model)
             except Exception as e:
                 session.rollback()
-                logger.error("Error while upserting %s", e)
+                logger.error("Error while updating " + str(model_class.__name__) + ": " + str(e))
                 status = False
 
         response = Response(
@@ -115,7 +125,7 @@ def get_items(
             session.rollback()
             status = False
             status_message = f"Error while fetching  {model_class.__name__}"
-            logger.error("Error while getting %s: %s", model_class.__name__, e)
+            logger.error("Error while getting items: " + str(model_class.__name__) + " " + str(e))
 
         response: Response = Response(
             message=status_message,
@@ -157,16 +167,16 @@ def delete(self, model_class: SQLModel, filters: dict = None):
                     status_message = f"{model_class.__name__} Deleted Successfully"
                 else:
                     print(f"Row with filters {filters} not found")
-                    logger.info("Row with filters %s not found", filters)
+                    logger.info("Row with filters + filters + not found")
                     status_message = "Row not found"
             except exc.IntegrityError as e:
                 session.rollback()
-                logger.error("Integrity ... Error while deleting: %s", e)
+                logger.error("Integrity ... Error while deleting: " + str(e))
                 status_message = f"The {model_class.__name__} is linked to another entity and cannot be deleted."
                 status = False
             except Exception as e:
                 session.rollback()
-                logger.error("Error while deleting: %s", e)
+                logger.error("Error while deleting: " + str(e))
                 status_message = f"Error while deleting: {e}"
                 status = False
             response = Response(
@@ -182,6 +192,7 @@ def get_linked_entities(
         primary_id: int,
         return_json: bool = False,
         agent_type: Optional[str] = None,
+        sequence_id: Optional[int] = None,
     ):
         """
         Get all entities linked to the primary entity.
@@ -217,19 +228,21 @@ def get_linked_entities(
                     linked_entities = agent.agents
                 elif link_type == "workflow_agent":
                     linked_entities = session.exec(
-                        select(Agent)
-                        .join(WorkflowAgentLink)
+                        select(WorkflowAgentLink, Agent)
+                        .join(Agent, WorkflowAgentLink.agent_id == Agent.id)
                         .where(
                             WorkflowAgentLink.workflow_id == primary_id,
-                            WorkflowAgentLink.agent_type == agent_type,
                         )
                     ).all()
+
+                    linked_entities = [WorkflowAgentMap(agent=agent, link=link) for link, agent in linked_entities]
+                    linked_entities = sorted(linked_entities, key=lambda x: x.link.sequence_id)  # type: ignore
             except Exception as e:
-                logger.error("Error while getting linked entities: %s", e)
+                logger.error("Error while getting linked entities: " + str(e))
                 status_message = f"Error while getting linked entities: {e}"
                 status = False
             if return_json:
-                linked_entities = [self._model_to_dict(row) for row in linked_entities]
+                linked_entities = [row.model_dump() for row in linked_entities]
 
         response = Response(
             message=status_message,
@@ -245,6 +258,7 @@ def link(
         primary_id: int,
         secondary_id: int,
         agent_type: Optional[str] = None,
+        sequence_id: Optional[int] = None,
     ) -> Response:
         """
         Link two entities together.
@@ -357,6 +371,7 @@ def link(
                                     WorkflowAgentLink.workflow_id == primary_id,
                                     WorkflowAgentLink.agent_id == secondary_id,
                                     WorkflowAgentLink.agent_type == agent_type,
+                                    WorkflowAgentLink.sequence_id == sequence_id,
                                 )
                             ).first()
                             if existing_link:
@@ -373,6 +388,7 @@ def link(
                                     workflow_id=primary_id,
                                     agent_id=secondary_id,
                                     agent_type=agent_type,
+                                    sequence_id=sequence_id,
                                 )
                                 session.add(workflow_agent_link)
                     # add and commit the link
@@ -385,7 +401,7 @@ def link(
 
                 except Exception as e:
                     session.rollback()
-                    logger.error("Error while linking: %s", e)
+                    logger.error("Error while linking: " + str(e))
                     status = False
                     status_message = f"Error while linking due to an exception: {e}"
 
@@ -402,6 +418,7 @@ def unlink(
         primary_id: int,
         secondary_id: int,
         agent_type: Optional[str] = None,
+        sequence_id: Optional[int] = 0,
     ) -> Response:
         """
         Unlink two entities.
@@ -417,6 +434,7 @@ def unlink(
         """
         status = True
         status_message = ""
+        print("primary", primary_id, "secondary", secondary_id, "sequence", sequence_id, "agent_type", agent_type)
 
         if link_type not in valid_link_types:
             status = False
@@ -452,6 +470,7 @@ def unlink(
                             WorkflowAgentLink.workflow_id == primary_id,
                             WorkflowAgentLink.agent_id == secondary_id,
                             WorkflowAgentLink.agent_type == agent_type,
+                            WorkflowAgentLink.sequence_id == sequence_id,
                         )
                     ).first()
 
@@ -465,7 +484,7 @@ def unlink(
 
             except Exception as e:
                 session.rollback()
-                logger.error("Error while unlinking: %s", e)
+                logger.error("Error while unlinking: " + str(e))
                 status = False
                 status_message = f"Error while unlinking due to an exception: {e}"
 
diff --git a/samples/apps/autogen-studio/autogenstudio/database/utils.py b/samples/apps/autogen-studio/autogenstudio/database/utils.py
index c14003b414c..189fa1baf8d 100644
--- a/samples/apps/autogen-studio/autogenstudio/database/utils.py
+++ b/samples/apps/autogen-studio/autogenstudio/database/utils.py
@@ -23,6 +23,7 @@
     Skill,
     Workflow,
     WorkflowAgentLink,
+    WorkFlowType,
 )
 
 
@@ -71,9 +72,15 @@ def get_agent(agent_id):
             agent_dict["agents"] = [get_agent(agent.id) for agent in agent.agents]
             return agent_dict
 
+    agents = []
     for link in workflow_agent_links:
         agent_dict = get_agent(link.agent_id)
-        workflow[str(link.agent_type.value)] = agent_dict
+        agents.append({"agent": agent_dict, "link": link.model_dump(mode="json")})
+        # workflow[str(link.agent_type.value)] = agent_dict
+    if workflow["type"] == WorkFlowType.sequential.value:
+        # sort agents by sequence_id in link
+        agents = sorted(agents, key=lambda x: x["link"]["sequence_id"])
+    workflow["agents"] = agents
     return workflow
 
 
@@ -141,9 +148,13 @@ def init_db_samples(dbmanager: Any):
         logger.info("Database already initialized with Default and Travel Planning Workflows")
         return
     logger.info("Initializing database with Default and Travel Planning Workflows")
+
     # models
-    gpt_4_model = Model(
-        model="gpt-4-1106-preview", description="OpenAI GPT-4 model", user_id="guestuser@gmail.com", api_type="open_ai"
+    google_gemini_model = Model(
+        model="gemini-1.5-pro-latest",
+        description="Google's Gemini model",
+        user_id="guestuser@gmail.com",
+        api_type="google",
     )
     azure_model = Model(
         model="gpt4-turbo",
@@ -160,61 +171,42 @@ def init_db_samples(dbmanager: Any):
         api_type="open_ai",
     )
 
-    google_gemini_model = Model(
-        model="gemini-1.5-pro-latest",
-        description="Google's Gemini model",
-        user_id="guestuser@gmail.com",
-        api_type="google",
+    gpt_4_model = Model(
+        model="gpt-4-1106-preview", description="OpenAI GPT-4 model", user_id="guestuser@gmail.com", api_type="open_ai"
     )
 
     # skills
-
+    generate_pdf_skill = Skill(
+        name="generate_and_save_pdf",
+        description="Generate and save a pdf file based on the provided input sections.",
+        user_id="guestuser@gmail.com",
+        libraries=["requests", "fpdf", "PIL"],
+        content='import uuid\nimport requests\nfrom fpdf import FPDF\nfrom typing import List, Dict, Optional\nfrom pathlib import Path\nfrom PIL import Image, ImageDraw, ImageOps\nfrom io import BytesIO\n\ndef generate_and_save_pdf(\n    sections: List[Dict[str, Optional[str]]], \n    output_file: str = "report.pdf", \n    report_title: str = "PDF Report"\n) -> None:\n    """\n    Function to generate a beautiful PDF report in A4 paper format. \n\n    :param sections: A list of sections where each section is represented by a dictionary containing:\n                     - title: The title of the section.\n                     - level: The heading level (e.g., "title", "h1", "h2").\n                     - content: The content or body text of the section.\n                     - image: (Optional) The URL or local path to the image.\n    :param output_file: The name of the output PDF file. (default is "report.pdf")\n    :param report_title: The title of the report. (default is "PDF Report")\n    :return: None\n    """\n\n    def get_image(image_url_or_path):\n        if image_url_or_path.startswith("http://") or image_url_or_path.startswith("https://"):\n            response = requests.get(image_url_or_path)\n            if response.status_code == 200:\n                return BytesIO(response.content)\n        elif Path(image_url_or_path).is_file():\n            return open(image_url_or_path, \'rb\')\n        return None\n\n    def add_rounded_corners(img, radius=6):\n        mask = Image.new(\'L\', img.size, 0)\n        draw = ImageDraw.Draw(mask)\n        draw.rounded_rectangle([(0, 0), img.size], radius, fill=255)\n        img = ImageOps.fit(img, mask.size, centering=(0.5, 0.5))\n        img.putalpha(mask)\n        return img\n\n    class PDF(FPDF):\n        def header(self):\n            self.set_font("Arial", "B", 12)\n            self.cell(0, 10, report_title, 0, 1, "C")\n            \n        def chapter_title(self, txt): \n            self.set_font("Arial", "B", 12)\n            self.cell(0, 10, txt, 0, 1, "L")\n            self.ln(2)\n        \n        def chapter_body(self, body):\n            self.set_font("Arial", "", 12)\n            self.multi_cell(0, 10, body)\n            self.ln()\n\n        def add_image(self, img_data):\n            img = Image.open(img_data)\n            img = add_rounded_corners(img)\n            img_path = Path(f"temp_{uuid.uuid4().hex}.png")\n            img.save(img_path, format="PNG")\n            self.image(str(img_path), x=None, y=None, w=190 if img.width > 190 else img.width)\n            self.ln(10)\n            img_path.unlink()\n\n    pdf = PDF()\n    pdf.add_page()\n    font_size = {"title": 16, "h1": 14, "h2": 12, "body": 12}\n\n    for section in sections:\n        title, level, content, image = section.get("title", ""), section.get("level", "h1"), section.get("content", ""), section.get("image")\n        pdf.set_font("Arial", "B" if level in font_size else "", font_size.get(level, font_size["body"]))\n        pdf.chapter_title(title)\n\n        if content: pdf.chapter_body(content)\n        if image:\n            img_data = get_image(image)\n            if img_data:\n                pdf.add_image(img_data)\n                if isinstance(img_data, BytesIO):\n                    img_data.close()\n\n    pdf.output(output_file)\n    print(f"PDF report saved as {output_file}")\n\n# # Example usage\n# sections = [\n#     {\n#         "title": "Introduction - Early Life",\n#         "level": "h1",\n#         "image": "https://picsum.photos/536/354",\n#         "content": ("Marie Curie was born on 7 November 1867 in Warsaw, Poland. "\n#                     "She was the youngest of five children. Both of her parents were teachers. "\n#                     "Her father was a math and physics instructor, and her mother was the head of a private school. "\n#                     "Marie\'s curiosity and brilliance were evident from an early age."),\n#     },\n#     {\n#         "title": "Academic Accomplishments",\n#         "level": "h2",\n#         "content": ("Despite many obstacles, Marie Curie earned degrees in physics and mathematics from the University of Paris. "\n#                     "She conducted groundbreaking research on radioactivity, becoming the first woman to win a Nobel Prize. "\n#                     "Her achievements paved the way for future generations of scientists, particularly women in STEM fields."),\n#     },\n#     {\n#         "title": "Major Discoveries",\n#         "level": "h2",\n#         "image": "https://picsum.photos/536/354",\n#         "content": ("One of Marie Curie\'s most notable discoveries was that of radium and polonium, two radioactive elements. "\n#                     "Her meticulous work not only advanced scientific understanding but also had practical applications in medicine and industry."),\n#     },\n#     {\n#         "title": "Conclusion - Legacy",\n#         "level": "h1",\n#         "content": ("Marie Curie\'s legacy lives on through her contributions to science, her role as a trailblazer for women in STEM, "\n#                     "and the ongoing impact of her discoveries on modern medicine and technology. "\n#                     "Her life and work remain an inspiration to many, demonstrating the power of perseverance and intellectual curiosity."),\n#     },\n# ]\n\n# generate_and_save_pdf_report(sections, "my_report.pdf", "The Life of Marie Curie")',
+    )
     generate_image_skill = Skill(
-        name="generate_images",
+        name="generate_and_save_images",
+        secrets=[{"secret": "OPENAI_API_KEY", "value": None}],
+        libraries=["openai"],
         description="Generate and save images based on a user's query.",
         content='\nfrom typing import List\nimport uuid\nimport requests  # to perform HTTP requests\nfrom pathlib import Path\n\nfrom openai import OpenAI\n\n\ndef generate_and_save_images(query: str, image_size: str = "1024x1024") -> List[str]:\n    """\n    Function to paint, draw or illustrate images based on the users query or request. Generates images from a given query using OpenAI\'s DALL-E model and saves them to disk.  Use the code below anytime there is a request to create an image.\n\n    :param query: A natural language description of the image to be generated.\n    :param image_size: The size of the image to be generated. (default is "1024x1024")\n    :return: A list of filenames for the saved images.\n    """\n\n    client = OpenAI()  # Initialize the OpenAI client\n    response = client.images.generate(model="dall-e-3", prompt=query, n=1, size=image_size)  # Generate images\n\n    # List to store the file names of saved images\n    saved_files = []\n\n    # Check if the response is successful\n    if response.data:\n        for image_data in response.data:\n            # Generate a random UUID as the file name\n            file_name = str(uuid.uuid4()) + ".png"  # Assuming the image is a PNG\n            file_path = Path(file_name)\n\n            img_url = image_data.url\n            img_response = requests.get(img_url)\n            if img_response.status_code == 200:\n                # Write the binary content to a file\n                with open(file_path, "wb") as img_file:\n                    img_file.write(img_response.content)\n                    print(f"Image saved to {file_path}")\n                    saved_files.append(str(file_path))\n            else:\n                print(f"Failed to download the image from {img_url}")\n    else:\n        print("No image data found in the response!")\n\n    # Return the list of saved files\n    return saved_files\n\n\n# Example usage of the function:\n# generate_and_save_images("A cute baby sea otter")\n',
         user_id="guestuser@gmail.com",
     )
 
     # agents
-    user_proxy_config = AgentConfig(
-        name="user_proxy",
-        description="User Proxy Agent Configuration",
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=25,
-        system_message="You are a helpful assistant",
-        code_execution_config=CodeExecutionConfigTypes.local,
-        default_auto_reply="TERMINATE",
-        llm_config=False,
-    )
-    user_proxy = Agent(
-        user_id="guestuser@gmail.com", type=AgentType.userproxy, config=user_proxy_config.model_dump(mode="json")
-    )
-
-    painter_assistant_config = AgentConfig(
-        name="default_assistant",
-        description="Assistant Agent",
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=25,
-        system_message=AssistantAgent.DEFAULT_SYSTEM_MESSAGE,
-        code_execution_config=CodeExecutionConfigTypes.none,
-        llm_config={},
-    )
-    painter_assistant = Agent(
-        user_id="guestuser@gmail.com", type=AgentType.assistant, config=painter_assistant_config.model_dump(mode="json")
-    )
 
     planner_assistant_config = AgentConfig(
         name="planner_assistant",
         description="Assistant Agent",
         human_input_mode="NEVER",
         max_consecutive_auto_reply=25,
-        system_message="You are a helpful assistant that can suggest a travel plan for a user. You are the primary cordinator who will receive suggestions or advice from other agents (local_assistant, language_assistant). You must ensure that the finally plan integrates the suggestions from other agents or team members. YOUR FINAL RESPONSE MUST BE THE COMPLETE PLAN. When the plan is complete and all perspectives are integrated, you can respond with TERMINATE.",
+        system_message="You are a helpful assistant that can suggest a travel plan for a user and utilize any context information provided. You are the primary cordinator who will receive suggestions or advice from other agents (local_assistant, language_assistant). You must ensure that the finally plan integrates the suggestions from other agents or team members. YOUR FINAL RESPONSE MUST BE THE COMPLETE PLAN. When the plan is complete and all perspectives are integrated, you can respond with TERMINATE.",
         code_execution_config=CodeExecutionConfigTypes.none,
         llm_config={},
     )
     planner_assistant = Agent(
-        user_id="guestuser@gmail.com", type=AgentType.assistant, config=planner_assistant_config.model_dump(mode="json")
+        user_id="guestuser@gmail.com",
+        type=AgentType.assistant,
+        config=planner_assistant_config.model_dump(mode="json"),
     )
 
     local_assistant_config = AgentConfig(
@@ -222,7 +214,7 @@ def init_db_samples(dbmanager: Any):
         description="Local Assistant Agent",
         human_input_mode="NEVER",
         max_consecutive_auto_reply=25,
-        system_message="You are a local assistant that can suggest local activities or places to visit for a user. You can suggest local activities, places to visit, restaurants to eat at, etc. You can also provide information about the weather, local events, etc. You can provide information about the local area, but you cannot suggest a complete travel plan. You can only provide information about the local area.",
+        system_message="You are a local assistant that can suggest local activities or places to visit for a user and can utilize any context information provided. You can suggest local activities, places to visit, restaurants to eat at, etc. You can also provide information about the weather, local events, etc. You can provide information about the local area, but you cannot suggest a complete travel plan. You can only provide information about the local area.",
         code_execution_config=CodeExecutionConfigTypes.none,
         llm_config={},
     )
@@ -245,7 +237,7 @@ def init_db_samples(dbmanager: Any):
         config=language_assistant_config.model_dump(mode="json"),
     )
 
-    # group chat
+    # group chat agent
     travel_groupchat_config = AgentConfig(
         name="travel_groupchat",
         admin_name="groupchat",
@@ -262,11 +254,48 @@ def init_db_samples(dbmanager: Any):
         user_id="guestuser@gmail.com", type=AgentType.groupchat, config=travel_groupchat_config.model_dump(mode="json")
     )
 
-    # workflows
-    default_workflow = Workflow(name="Default Workflow", description="Default workflow", user_id="guestuser@gmail.com")
+    user_proxy_config = AgentConfig(
+        name="user_proxy",
+        description="User Proxy Agent Configuration",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=25,
+        system_message="You are a helpful assistant",
+        code_execution_config=CodeExecutionConfigTypes.local,
+        default_auto_reply="TERMINATE",
+        llm_config=False,
+    )
+    user_proxy = Agent(
+        user_id="guestuser@gmail.com", type=AgentType.userproxy, config=user_proxy_config.model_dump(mode="json")
+    )
+
+    default_assistant_config = AgentConfig(
+        name="default_assistant",
+        description="Assistant Agent",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=25,
+        system_message=AssistantAgent.DEFAULT_SYSTEM_MESSAGE,
+        code_execution_config=CodeExecutionConfigTypes.none,
+        llm_config={},
+    )
+    default_assistant = Agent(
+        user_id="guestuser@gmail.com", type=AgentType.assistant, config=default_assistant_config.model_dump(mode="json")
+    )
 
+    # workflows
     travel_workflow = Workflow(
-        name="Travel Planning Workflow", description="Travel workflow", user_id="guestuser@gmail.com"
+        name="Travel Planning Workflow",
+        description="Travel workflow",
+        user_id="guestuser@gmail.com",
+        sample_tasks=["Plan a 3 day trip to Hawaii Islands.", "Plan an eventful and exciting trip to  Uzbeksitan."],
+    )
+    default_workflow = Workflow(
+        name="Default Workflow",
+        description="Default workflow",
+        user_id="guestuser@gmail.com",
+        sample_tasks=[
+            "paint a picture of a glass of ethiopian coffee, freshly brewed in a tall glass cup, on a table right in front of a lush green forest scenery",
+            "Plot the stock price of NVIDIA YTD.",
+        ],
     )
 
     with Session(dbmanager.engine) as session:
@@ -275,26 +304,27 @@ def init_db_samples(dbmanager: Any):
         session.add(azure_model)
         session.add(gpt_4_model)
         session.add(generate_image_skill)
+        session.add(generate_pdf_skill)
         session.add(user_proxy)
-        session.add(painter_assistant)
+        session.add(default_assistant)
         session.add(travel_groupchat_agent)
         session.add(planner_assistant)
         session.add(local_assistant)
         session.add(language_assistant)
 
-        session.add(default_workflow)
         session.add(travel_workflow)
+        session.add(default_workflow)
         session.commit()
 
-        dbmanager.link(link_type="agent_model", primary_id=painter_assistant.id, secondary_id=gpt_4_model.id)
-        dbmanager.link(link_type="agent_skill", primary_id=painter_assistant.id, secondary_id=generate_image_skill.id)
+        dbmanager.link(link_type="agent_model", primary_id=default_assistant.id, secondary_id=gpt_4_model.id)
+        dbmanager.link(link_type="agent_skill", primary_id=default_assistant.id, secondary_id=generate_image_skill.id)
         dbmanager.link(
             link_type="workflow_agent", primary_id=default_workflow.id, secondary_id=user_proxy.id, agent_type="sender"
         )
         dbmanager.link(
             link_type="workflow_agent",
             primary_id=default_workflow.id,
-            secondary_id=painter_assistant.id,
+            secondary_id=default_assistant.id,
             agent_type="receiver",
         )
 
diff --git a/samples/apps/autogen-studio/autogenstudio/datamodel.py b/samples/apps/autogen-studio/autogenstudio/datamodel.py
index 3dbd46c357e..6c6dc567a80 100644
--- a/samples/apps/autogen-studio/autogenstudio/datamodel.py
+++ b/samples/apps/autogen-studio/autogenstudio/datamodel.py
@@ -20,6 +20,16 @@
 # pylint: disable=protected-access
 
 
+class MessageMeta(SQLModel, table=False):
+    task: Optional[str] = None
+    messages: Optional[List[Dict[str, Any]]] = None
+    summary_method: Optional[str] = "last"
+    files: Optional[List[dict]] = None
+    time: Optional[datetime] = None
+    log: Optional[List[dict]] = None
+    usage: Optional[List[dict]] = None
+
+
 class Message(SQLModel, table=True):
     __table_args__ = {"sqlite_autoincrement": True}
     id: Optional[int] = Field(default=None, primary_key=True)
@@ -38,7 +48,7 @@ class Message(SQLModel, table=True):
         default=None, sa_column=Column(Integer, ForeignKey("session.id", ondelete="CASCADE"))
     )
     connection_id: Optional[str] = None
-    meta: Optional[Dict] = Field(default={}, sa_column=Column(JSON))
+    meta: Optional[Union[MessageMeta, dict]] = Field(default={}, sa_column=Column(JSON))
 
 
 class Session(SQLModel, table=True):
@@ -82,11 +92,12 @@ class Skill(SQLModel, table=True):
         sa_column=Column(DateTime(timezone=True), onupdate=func.now()),
     )  # pylint: disable=not-callable
     user_id: Optional[str] = None
+    version: Optional[str] = "0.0.1"
     name: str
     content: str
     description: Optional[str] = None
-    secrets: Optional[Dict] = Field(default={}, sa_column=Column(JSON))
-    libraries: Optional[Dict] = Field(default={}, sa_column=Column(JSON))
+    secrets: Optional[List[dict]] = Field(default_factory=list, sa_column=Column(JSON))
+    libraries: Optional[List[str]] = Field(default_factory=list, sa_column=Column(JSON))
     agents: List["Agent"] = Relationship(back_populates="skills", link_model=AgentSkillLink)
 
 
@@ -97,7 +108,7 @@ class LLMConfig(SQLModel, table=False):
     temperature: float = 0
     cache_seed: Optional[Union[int, None]] = None
     timeout: Optional[int] = None
-    max_tokens: Optional[int] = 1000
+    max_tokens: Optional[int] = 2048
     extra_body: Optional[dict] = None
 
 
@@ -105,6 +116,10 @@ class ModelTypes(str, Enum):
     openai = "open_ai"
     google = "google"
     azure = "azure"
+    anthropic = "anthropic"
+    mistral = "mistral"
+    together = "together"
+    groq = "groq"
 
 
 class Model(SQLModel, table=True):
@@ -119,6 +134,7 @@ class Model(SQLModel, table=True):
         sa_column=Column(DateTime(timezone=True), onupdate=func.now()),
     )  # pylint: disable=not-callable
     user_id: Optional[str] = None
+    version: Optional[str] = "0.0.1"
     model: str
     api_key: Optional[str] = None
     base_url: Optional[str] = None
@@ -164,6 +180,7 @@ class WorkflowAgentType(str, Enum):
     sender = "sender"
     receiver = "receiver"
     planner = "planner"
+    sequential = "sequential"
 
 
 class WorkflowAgentLink(SQLModel, table=True):
@@ -174,6 +191,7 @@ class WorkflowAgentLink(SQLModel, table=True):
         default=WorkflowAgentType.sender,
         sa_column=Column(SqlEnum(WorkflowAgentType), primary_key=True),
     )
+    sequence_id: Optional[int] = Field(default=0, primary_key=True)
 
 
 class AgentLink(SQLModel, table=True):
@@ -194,8 +212,9 @@ class Agent(SQLModel, table=True):
         sa_column=Column(DateTime(timezone=True), onupdate=func.now()),
     )  # pylint: disable=not-callable
     user_id: Optional[str] = None
+    version: Optional[str] = "0.0.1"
     type: AgentType = Field(default=AgentType.assistant, sa_column=Column(SqlEnum(AgentType)))
-    config: AgentConfig = Field(default_factory=AgentConfig, sa_column=Column(JSON))
+    config: Union[AgentConfig, dict] = Field(default_factory=AgentConfig, sa_column=Column(JSON))
     skills: List[Skill] = Relationship(back_populates="agents", link_model=AgentSkillLink)
     models: List[Model] = Relationship(back_populates="agents", link_model=AgentModelLink)
     workflows: List["Workflow"] = Relationship(link_model=WorkflowAgentLink, back_populates="agents")
@@ -215,11 +234,12 @@ class Agent(SQLModel, table=True):
             secondaryjoin="Agent.id==AgentLink.agent_id",
         ),
     )
+    task_instruction: Optional[str] = None
 
 
 class WorkFlowType(str, Enum):
-    twoagents = "twoagents"
-    groupchat = "groupchat"
+    autonomous = "autonomous"
+    sequential = "sequential"
 
 
 class WorkFlowSummaryMethod(str, Enum):
@@ -240,14 +260,16 @@ class Workflow(SQLModel, table=True):
         sa_column=Column(DateTime(timezone=True), onupdate=func.now()),
     )  # pylint: disable=not-callable
     user_id: Optional[str] = None
+    version: Optional[str] = "0.0.1"
     name: str
     description: str
     agents: List[Agent] = Relationship(back_populates="workflows", link_model=WorkflowAgentLink)
-    type: WorkFlowType = Field(default=WorkFlowType.twoagents, sa_column=Column(SqlEnum(WorkFlowType)))
+    type: WorkFlowType = Field(default=WorkFlowType.autonomous, sa_column=Column(SqlEnum(WorkFlowType)))
     summary_method: Optional[WorkFlowSummaryMethod] = Field(
         default=WorkFlowSummaryMethod.last,
         sa_column=Column(SqlEnum(WorkFlowSummaryMethod)),
     )
+    sample_tasks: Optional[List[str]] = Field(default_factory=list, sa_column=Column(JSON))
 
 
 class Response(SQLModel):
diff --git a/samples/apps/autogen-studio/autogenstudio/profiler.py b/samples/apps/autogen-studio/autogenstudio/profiler.py
new file mode 100644
index 00000000000..679a56917e2
--- /dev/null
+++ b/samples/apps/autogen-studio/autogenstudio/profiler.py
@@ -0,0 +1,108 @@
+# metrics - agent_frequency, execution_count, tool_count,
+
+from typing import Dict, List, Optional
+
+from .datamodel import Message, MessageMeta
+
+
+class Profiler:
+    """
+    Profiler class to profile agent task runs and compute metrics
+    for performance evaluation.
+    """
+
+    def __init__(self):
+        self.metrics: List[Dict] = []
+
+    def _is_code(self, message: Message) -> bool:
+        """
+        Check if the message contains code.
+
+        :param message: The message instance to check.
+        :return: True if the message contains code, False otherwise.
+        """
+        content = message.get("message").get("content").lower()
+        return "```" in content
+
+    def _is_tool(self, message: Message) -> bool:
+        """
+        Check if the message uses a tool.
+
+        :param message: The message instance to check.
+        :return: True if the message uses a tool, False otherwise.
+        """
+        content = message.get("message").get("content").lower()
+        return "from skills import" in content
+
+    def _is_code_execution(self, message: Message) -> bool:
+        """
+        Check if the message indicates code execution.
+
+        :param message: The message instance to check.
+        :return: dict with is_code and status keys.
+        """
+        content = message.get("message").get("content").lower()
+        if "exitcode:" in content:
+            status = "exitcode: 0" in content
+            return {"is_code": True, "status": status}
+        else:
+            return {"is_code": False, "status": False}
+
+    def _is_terminate(self, message: Message) -> bool:
+        """
+        Check if the message indicates termination.
+
+        :param message: The message instance to check.
+        :return: True if the message indicates termination, False otherwise.
+        """
+        content = message.get("message").get("content").lower()
+        return "terminate" in content
+
+    def profile(self, agent_message: Message):
+        """
+        Profile the agent task run and compute metrics.
+
+        :param agent: The agent instance that ran the task.
+        :param task: The task instance that was run.
+        """
+        meta = MessageMeta(**agent_message.meta)
+        print(meta.log)
+        usage = meta.usage
+        messages = meta.messages
+        profile = []
+        bar = []
+        stats = {}
+        total_code_executed = 0
+        success_code_executed = 0
+        agents = []
+        for message in messages:
+            agent = message.get("sender")
+            is_code = self._is_code(message)
+            is_tool = self._is_tool(message)
+            is_code_execution = self._is_code_execution(message)
+            total_code_executed += is_code_execution["is_code"]
+            success_code_executed += 1 if is_code_execution["status"] else 0
+
+            row = {
+                "agent": agent,
+                "tool_call": is_code,
+                "code_execution": is_code_execution,
+                "terminate": self._is_terminate(message),
+            }
+            bar_row = {
+                "agent": agent,
+                "tool_call": "tool call" if is_tool else "no tool call",
+                "code_execution": (
+                    "success"
+                    if is_code_execution["status"]
+                    else "failure" if is_code_execution["is_code"] else "no code"
+                ),
+                "message": 1,
+            }
+            profile.append(row)
+            bar.append(bar_row)
+            agents.append(agent)
+        code_success_rate = (success_code_executed / total_code_executed if total_code_executed > 0 else 0) * 100
+        stats["code_success_rate"] = code_success_rate
+        stats["total_code_executed"] = total_code_executed
+        return {"profile": profile, "bar": bar, "stats": stats, "agents": set(agents), "usage": usage}
diff --git a/samples/apps/autogen-studio/autogenstudio/utils/utils.py b/samples/apps/autogen-studio/autogenstudio/utils/utils.py
index ed533ec3883..40cd549cb06 100644
--- a/samples/apps/autogen-studio/autogenstudio/utils/utils.py
+++ b/samples/apps/autogen-studio/autogenstudio/utils/utils.py
@@ -289,7 +289,7 @@ def init_app_folders(app_file_path: str) -> Dict[str, str]:
     return folders
 
 
-def get_skills_from_prompt(skills: List[Skill], work_dir: str) -> str:
+def get_skills_prompt(skills: List[Skill], work_dir: str) -> str:
     """
     Create a prompt with the content of all skills and write the skills to a file named skills.py in the work_dir.
 
@@ -306,10 +306,18 @@ def get_skills_from_prompt(skills: List[Skill], work_dir: str) -> str:
 
          """
     prompt = ""  # filename:  skills.py
+
     for skill in skills:
+        if not isinstance(skill, Skill):
+            skill = Skill(**skill)
+        if skill.secrets:
+            for secret in skill.secrets:
+                if secret.get("value") is not None:
+                    os.environ[secret["secret"]] = secret["value"]
         prompt += f"""
 
 ##### Begin of {skill.name} #####
+from skills import {skill.name} # Import the function from skills.py
 
 {skill.content}
 
@@ -317,15 +325,40 @@ def get_skills_from_prompt(skills: List[Skill], work_dir: str) -> str:
 
         """
 
+    return instruction + prompt
+
+
+def save_skills_to_file(skills: List[Skill], work_dir: str) -> None:
+    """
+    Write the skills to a file named skills.py in the work_dir.
+
+    :param skills: A dictionary skills
+    """
+
+    # TBD: Double check for duplicate skills?
+
     # check if work_dir exists
     if not os.path.exists(work_dir):
         os.makedirs(work_dir)
 
+    skills_content = ""
+    for skill in skills:
+        if not isinstance(skill, Skill):
+            skill = Skill(**skill)
+
+        skills_content += f"""
+
+##### Begin of {skill.name} #####
+
+{skill.content}
+
+#### End of {skill.name} ####
+
+        """
+
     # overwrite skills.py in work_dir
     with open(os.path.join(work_dir, "skills.py"), "w", encoding="utf-8") as f:
-        f.write(prompt)
-
-    return instruction + prompt
+        f.write(skills_content)
 
 
 def delete_files_in_folder(folders: Union[str, List[str]]) -> None:
@@ -405,9 +438,23 @@ def test_model(model: Model):
     Test the model endpoint by sending a simple message to the model and returning the response.
     """
 
+    print("Testing model", model)
+
     sanitized_model = sanitize_model(model)
     client = OpenAIWrapper(config_list=[sanitized_model])
-    response = client.create(messages=[{"role": "user", "content": "2+2="}], cache_seed=None)
+    response = client.create(
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant that can add numbers. ONLY RETURN THE RESULT.",
+            },
+            {
+                "role": "user",
+                "content": "2+2=",
+            },
+        ],
+        cache_seed=None,
+    )
     return response.choices[0].message.content
 
 
@@ -426,7 +473,11 @@ def load_code_execution_config(code_execution_type: CodeExecutionConfigTypes, wo
     if code_execution_type == CodeExecutionConfigTypes.local:
         executor = LocalCommandLineCodeExecutor(work_dir=work_dir)
     elif code_execution_type == CodeExecutionConfigTypes.docker:
-        executor = DockerCommandLineCodeExecutor(work_dir=work_dir)
+        try:
+            executor = DockerCommandLineCodeExecutor(work_dir=work_dir)
+        except Exception as e:
+            logger.error(f"Error initializing Docker executor: {e}")
+            return False
     elif code_execution_type == CodeExecutionConfigTypes.none:
         return False
     else:
@@ -462,3 +513,61 @@ def summarize_chat_history(task: str, messages: List[Dict[str, str]], client: Mo
     ]
     response = client.create(messages=summarization_prompt, cache_seed=None)
     return response.choices[0].message.content
+
+
+def get_autogen_log(db_path="logs.db"):
+    """
+    Fetches data the autogen logs database.
+    Args:
+        dbname (str): Name of the database file. Defaults to "logs.db".
+        table (str): Name of the table to query. Defaults to "chat_completions".
+
+    Returns:
+        list: A list of dictionaries, where each dictionary represents a row from the table.
+    """
+    import json
+    import sqlite3
+
+    con = sqlite3.connect(db_path)
+    query = """
+        SELECT
+            chat_completions.*,
+            agents.name AS agent_name
+        FROM
+            chat_completions
+        JOIN
+            agents ON chat_completions.wrapper_id = agents.wrapper_id
+    """
+    cursor = con.execute(query)
+    rows = cursor.fetchall()
+    column_names = [description[0] for description in cursor.description]
+    data = [dict(zip(column_names, row)) for row in rows]
+    for row in data:
+        response = json.loads(row["response"])
+        print(response)
+        total_tokens = response.get("usage", {}).get("total_tokens", 0)
+        row["total_tokens"] = total_tokens
+    con.close()
+    return data
+
+
+def find_key_value(d, target_key):
+    """
+    Recursively search for a key in a nested dictionary and return its value.
+    """
+    if d is None:
+        return None
+
+    if isinstance(d, dict):
+        if target_key in d:
+            return d[target_key]
+        for k in d:
+            item = find_key_value(d[k], target_key)
+            if item is not None:
+                return item
+    elif isinstance(d, list):
+        for i in d:
+            item = find_key_value(i, target_key)
+            if item is not None:
+                return item
+    return None
diff --git a/samples/apps/autogen-studio/autogenstudio/version.py b/samples/apps/autogen-studio/autogenstudio/version.py
index bafe37f75b1..3d83da06d44 100644
--- a/samples/apps/autogen-studio/autogenstudio/version.py
+++ b/samples/apps/autogen-studio/autogenstudio/version.py
@@ -1,3 +1,3 @@
-VERSION = "0.0.56rc9"
+VERSION = "0.1.4"
 __version__ = VERSION
 APP_NAME = "autogenstudio"
diff --git a/samples/apps/autogen-studio/autogenstudio/web/app.py b/samples/apps/autogen-studio/autogenstudio/web/app.py
index 76ab8139ebc..5926f6c64a1 100644
--- a/samples/apps/autogen-studio/autogenstudio/web/app.py
+++ b/samples/apps/autogen-studio/autogenstudio/web/app.py
@@ -4,7 +4,7 @@
 import threading
 import traceback
 from contextlib import asynccontextmanager
-from typing import Any
+from typing import Any, Union
 
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
@@ -16,9 +16,11 @@
 from ..database import workflow_from_id
 from ..database.dbmanager import DBManager
 from ..datamodel import Agent, Message, Model, Response, Session, Skill, Workflow
+from ..profiler import Profiler
 from ..utils import check_and_cast_datetime_fields, init_app_folders, md5_hash, test_model
 from ..version import VERSION
 
+profiler = Profiler()
 managers = {"chat": None}  # manage calls to autogen
 # Create thread-safe queue for messages between api thread and autogen threads
 message_queue = queue.Queue()
@@ -92,8 +94,15 @@ async def lifespan(app: FastAPI):
     allow_headers=["*"],
 )
 
-
-api = FastAPI(root_path="/api")
+show_docs = os.environ.get("AUTOGENSTUDIO_API_DOCS", "False").lower() == "true"
+docs_url = "/docs" if show_docs else None
+api = FastAPI(
+    root_path="/api",
+    title="AutoGen Studio API",
+    version=VERSION,
+    docs_url=docs_url,
+    description="AutoGen Studio is a low-code tool for building and testing multi-agent workflows using AutoGen.",
+)
 # mount an api route such that the main route serves the ui and the /api
 app.mount("/api", api)
 
@@ -293,6 +302,19 @@ async def get_workflow(workflow_id: int, user_id: str):
     return list_entity(Workflow, filters=filters)
 
 
+@api.get("/workflows/export/{workflow_id}")
+async def export_workflow(workflow_id: int, user_id: str):
+    """Export a user workflow"""
+    response = Response(message="Workflow exported successfully", status=True, data=None)
+    try:
+        workflow_details = workflow_from_id(workflow_id, dbmanager=dbmanager)
+        response.data = workflow_details
+    except Exception as ex_error:
+        response.message = "Error occurred while exporting workflow: " + str(ex_error)
+        response.status = False
+    return response.model_dump(mode="json")
+
+
 @api.post("/workflows")
 async def create_workflow(workflow: Workflow):
     """Create a new workflow"""
@@ -317,6 +339,19 @@ async def link_workflow_agent(workflow_id: int, agent_id: int, agent_type: str):
     )
 
 
+@api.post("/workflows/link/agent/{workflow_id}/{agent_id}/{agent_type}/{sequence_id}")
+async def link_workflow_agent_sequence(workflow_id: int, agent_id: int, agent_type: str, sequence_id: int):
+    """Link an agent to a workflow"""
+    print("Sequence ID: ", sequence_id)
+    return dbmanager.link(
+        link_type="workflow_agent",
+        primary_id=workflow_id,
+        secondary_id=agent_id,
+        agent_type=agent_type,
+        sequence_id=sequence_id,
+    )
+
+
 @api.delete("/workflows/link/agent/{workflow_id}/{agent_id}/{agent_type}")
 async def unlink_workflow_agent(workflow_id: int, agent_id: int, agent_type: str):
     """Unlink an agent from a workflow"""
@@ -328,17 +363,47 @@ async def unlink_workflow_agent(workflow_id: int, agent_id: int, agent_type: str
     )
 
 
-@api.get("/workflows/link/agent/{workflow_id}/{agent_type}")
-async def get_linked_workflow_agents(workflow_id: int, agent_type: str):
+@api.delete("/workflows/link/agent/{workflow_id}/{agent_id}/{agent_type}/{sequence_id}")
+async def unlink_workflow_agent_sequence(workflow_id: int, agent_id: int, agent_type: str, sequence_id: int):
+    """Unlink an agent from a workflow sequence"""
+    return dbmanager.unlink(
+        link_type="workflow_agent",
+        primary_id=workflow_id,
+        secondary_id=agent_id,
+        agent_type=agent_type,
+        sequence_id=sequence_id,
+    )
+
+
+@api.get("/workflows/link/agent/{workflow_id}")
+async def get_linked_workflow_agents(workflow_id: int):
     """Get all agents linked to a workflow"""
     return dbmanager.get_linked_entities(
         link_type="workflow_agent",
         primary_id=workflow_id,
-        agent_type=agent_type,
         return_json=True,
     )
 
 
+@api.get("/profiler/{message_id}")
+async def profile_agent_task_run(message_id: int):
+    """Profile an agent task run"""
+    try:
+        agent_message = dbmanager.get(Message, filters={"id": message_id}).data[0]
+
+        profile = profiler.profile(agent_message)
+        return {
+            "status": True,
+            "message": "Agent task run profiled successfully",
+            "data": profile,
+        }
+    except Exception as ex_error:
+        return {
+            "status": False,
+            "message": "Error occurred while profiling agent task run: " + str(ex_error),
+        }
+
+
 @api.get("/sessions")
 async def list_sessions(user_id: str):
     """List all sessions for a user"""
@@ -395,7 +460,6 @@ async def run_session_workflow(message: Message, session_id: int, workflow_id: i
         response: Response = dbmanager.upsert(agent_response)
         return response.model_dump(mode="json")
     except Exception as ex_error:
-        print(traceback.format_exc())
         return {
             "status": False,
             "message": "Error occurred while processing message: " + str(ex_error),
diff --git a/samples/apps/autogen-studio/autogenstudio/web/serve.py b/samples/apps/autogen-studio/autogenstudio/web/serve.py
new file mode 100644
index 00000000000..462615378b8
--- /dev/null
+++ b/samples/apps/autogen-studio/autogenstudio/web/serve.py
@@ -0,0 +1,30 @@
+# loads a fast api api endpoint with a single endpoint that takes text query and return a response
+
+import json
+import os
+
+from fastapi import FastAPI
+
+from ..datamodel import Response
+from ..workflowmanager import WorkflowManager
+
+app = FastAPI()
+workflow_file_path = os.environ.get("AUTOGENSTUDIO_WORKFLOW_FILE", None)
+
+
+if workflow_file_path:
+    workflow_manager = WorkflowManager(workflow=workflow_file_path)
+else:
+    raise ValueError("Workflow file must be specified")
+
+
+@app.get("/predict/{task}")
+async def predict(task: str):
+    response = Response(message="Task successfully completed", status=True, data=None)
+    try:
+        result_message = workflow_manager.run(message=task, clear_history=False)
+        response.data = result_message
+    except Exception as e:
+        response.message = str(e)
+        response.status = False
+    return response
diff --git a/samples/apps/autogen-studio/autogenstudio/workflowmanager.py b/samples/apps/autogen-studio/autogenstudio/workflowmanager.py
index 8b41caab428..f5065e85e5c 100644
--- a/samples/apps/autogen-studio/autogenstudio/workflowmanager.py
+++ b/samples/apps/autogen-studio/autogenstudio/workflowmanager.py
@@ -1,4 +1,6 @@
+import json
 import os
+import time
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Union
 
@@ -7,20 +9,33 @@
 from .datamodel import (
     Agent,
     AgentType,
+    CodeExecutionConfigTypes,
     Message,
     SocketMessage,
+    Workflow,
+    WorkFlowSummaryMethod,
+    WorkFlowType,
+)
+from .utils import (
+    clear_folder,
+    find_key_value,
+    get_modified_files,
+    get_skills_prompt,
+    load_code_execution_config,
+    sanitize_model,
+    save_skills_to_file,
+    summarize_chat_history,
 )
-from .utils import clear_folder, get_skills_from_prompt, load_code_execution_config, sanitize_model
 
 
-class WorkflowManager:
+class AutoWorkflowManager:
     """
-    AutoGenWorkFlowManager class to load agents from a provided configuration and run a chat between them
+    WorkflowManager class to load agents from a provided configuration and run a chat between them.
     """
 
     def __init__(
         self,
-        workflow: Dict,
+        workflow: Union[Dict, str],
         history: Optional[List[Message]] = None,
         work_dir: str = None,
         clear_work_dir: bool = True,
@@ -28,27 +43,74 @@ def __init__(
         connection_id: Optional[str] = None,
     ) -> None:
         """
-        Initializes the AutoGenFlow with agents specified in the config and optional
-        message history.
+        Initializes the WorkflowManager with agents specified in the config and optional message history.
 
         Args:
-            config: The configuration settings for the sender and receiver agents.
-            history: An optional list of previous messages to populate the agents' history.
-
+            workflow (Union[Dict, str]): The workflow configuration. This can be a dictionary or a string which is a path to a JSON file.
+            history (Optional[List[Message]]): The message history.
+            work_dir (str): The working directory.
+            clear_work_dir (bool): If set to True, clears the working directory.
+            send_message_function (Optional[callable]): The function to send messages.
+            connection_id (Optional[str]): The connection identifier.
         """
+        if isinstance(workflow, str):
+            if os.path.isfile(workflow):
+                with open(workflow, "r") as file:
+                    self.workflow = json.load(file)
+            else:
+                raise FileNotFoundError(f"The file {workflow} does not exist.")
+        elif isinstance(workflow, dict):
+            self.workflow = workflow
+        else:
+            raise ValueError("The 'workflow' parameter should be either a dictionary or a valid JSON file path")
+
         # TODO - improved typing for workflow
+        self.workflow_skills = []
         self.send_message_function = send_message_function
         self.connection_id = connection_id
         self.work_dir = work_dir or "work_dir"
+        self.code_executor_pool = {
+            CodeExecutionConfigTypes.local: load_code_execution_config(
+                CodeExecutionConfigTypes.local, work_dir=self.work_dir
+            ),
+            CodeExecutionConfigTypes.docker: load_code_execution_config(
+                CodeExecutionConfigTypes.docker, work_dir=self.work_dir
+            ),
+        }
         if clear_work_dir:
             clear_folder(self.work_dir)
-        self.workflow = workflow
-        self.sender = self.load(workflow.get("sender"))
-        self.receiver = self.load(workflow.get("receiver"))
         self.agent_history = []
+        self.history = history or []
+        self.sender = None
+        self.receiver = None
 
-        if history:
-            self._populate_history(history)
+    def _run_workflow(self, message: str, history: Optional[List[Message]] = None, clear_history: bool = False) -> None:
+        """
+        Runs the workflow based on the provided configuration.
+
+        Args:
+            message: The initial message to start the chat.
+            history: A list of messages to populate the agents' history.
+            clear_history: If set to True, clears the chat history before initiating.
+
+        """
+        for agent in self.workflow.get("agents", []):
+            if agent.get("link").get("agent_type") == "sender":
+                self.sender = self.load(agent.get("agent"))
+            elif agent.get("link").get("agent_type") == "receiver":
+                self.receiver = self.load(agent.get("agent"))
+        if self.sender and self.receiver:
+            # save all agent skills to skills.py
+            save_skills_to_file(self.workflow_skills, self.work_dir)
+            if history:
+                self._populate_history(history)
+            self.sender.initiate_chat(
+                self.receiver,
+                message=message,
+                clear_history=clear_history,
+            )
+        else:
+            raise ValueError("Sender and receiver agents are not defined in the workflow configuration.")
 
     def _serialize_agent(
         self,
@@ -184,13 +246,13 @@ def get_default_system_message(agent_type: str) -> str:
                 config_list.append(sanitized_llm)
             agent.config.llm_config.config_list = config_list
 
-        agent.config.code_execution_config = load_code_execution_config(
-            agent.config.code_execution_config, work_dir=self.work_dir
-        )
+        agent.config.code_execution_config = self.code_executor_pool.get(agent.config.code_execution_config, False)
 
         if skills:
+            for skill in skills:
+                self.workflow_skills.append(skill)
             skills_prompt = ""
-            skills_prompt = get_skills_from_prompt(skills, self.work_dir)
+            skills_prompt = get_skills_prompt(skills, self.work_dir)
             if agent.config.system_message:
                 agent.config.system_message = agent.config.system_message + "\n\n" + skills_prompt
             else:
@@ -241,7 +303,263 @@ def load(self, agent: Any) -> autogen.Agent:
                 raise ValueError(f"Unknown agent type: {agent.type}")
             return agent
 
-    def run(self, message: str, clear_history: bool = False) -> None:
+    def _generate_output(
+        self,
+        message_text: str,
+        summary_method: str,
+    ) -> str:
+        """
+        Generates the output response based on the workflow configuration and agent history.
+
+        :param message_text: The text of the incoming message.
+        :param flow: An instance of `WorkflowManager`.
+        :param flow_config: An instance of `AgentWorkFlowConfig`.
+        :return: The output response as a string.
+        """
+
+        output = ""
+        if summary_method == WorkFlowSummaryMethod.last:
+            (self.agent_history)
+            last_message = self.agent_history[-1]["message"]["content"] if self.agent_history else ""
+            output = last_message
+        elif summary_method == WorkFlowSummaryMethod.llm:
+            client = self.receiver.client
+            if self.connection_id:
+                status_message = SocketMessage(
+                    type="agent_status",
+                    data={
+                        "status": "summarizing",
+                        "message": "Summarizing agent dialogue",
+                    },
+                    connection_id=self.connection_id,
+                )
+                self.send_message_function(status_message.model_dump(mode="json"))
+            output = summarize_chat_history(
+                task=message_text,
+                messages=self.agent_history,
+                client=client,
+            )
+
+        elif summary_method == "none":
+            output = ""
+        return output
+
+    def _get_agent_usage(self, agent: autogen.Agent):
+        final_usage = []
+        default_usage = {"total_cost": 0, "total_tokens": 0}
+        agent_usage = agent.client.total_usage_summary if agent.client else default_usage
+        agent_usage = {
+            "agent": agent.name,
+            "total_cost": find_key_value(agent_usage, "total_cost") or 0,
+            "total_tokens": find_key_value(agent_usage, "total_tokens") or 0,
+        }
+        final_usage.append(agent_usage)
+
+        if type(agent) == ExtendedGroupChatManager:
+            print("groupchat found, processing", len(agent.groupchat.agents))
+            for agent in agent.groupchat.agents:
+                agent_usage = agent.client.total_usage_summary if agent.client else default_usage or default_usage
+                agent_usage = {
+                    "agent": agent.name,
+                    "total_cost": find_key_value(agent_usage, "total_cost") or 0,
+                    "total_tokens": find_key_value(agent_usage, "total_tokens") or 0,
+                }
+                final_usage.append(agent_usage)
+        return final_usage
+
+    def _get_usage_summary(self):
+        sender_usage = self._get_agent_usage(self.sender)
+        receiver_usage = self._get_agent_usage(self.receiver)
+
+        all_usage = []
+        all_usage.extend(sender_usage)
+        all_usage.extend(receiver_usage)
+        # all_usage = [sender_usage, receiver_usage]
+        return all_usage
+
+    def run(self, message: str, history: Optional[List[Message]] = None, clear_history: bool = False) -> Message:
+        """
+        Initiates a chat between the sender and receiver agents with an initial message
+        and an option to clear the history.
+
+        Args:
+            message: The initial message to start the chat.
+            clear_history: If set to True, clears the chat history before initiating.
+        """
+
+        start_time = time.time()
+        self._run_workflow(message=message, history=history, clear_history=clear_history)
+        end_time = time.time()
+
+        output = self._generate_output(message, self.workflow.get("summary_method", "last"))
+
+        usage = self._get_usage_summary()
+        # print("usage", usage)
+
+        result_message = Message(
+            content=output,
+            role="assistant",
+            meta={
+                "messages": self.agent_history,
+                "summary_method": self.workflow.get("summary_method", "last"),
+                "time": end_time - start_time,
+                "files": get_modified_files(start_time, end_time, source_dir=self.work_dir),
+                "usage": usage,
+            },
+        )
+        return result_message
+
+
+class SequentialWorkflowManager:
+    """
+    WorkflowManager class to load agents from a provided configuration and run a chat between them sequentially.
+    """
+
+    def __init__(
+        self,
+        workflow: Union[Dict, str],
+        history: Optional[List[Message]] = None,
+        work_dir: str = None,
+        clear_work_dir: bool = True,
+        send_message_function: Optional[callable] = None,
+        connection_id: Optional[str] = None,
+    ) -> None:
+        """
+        Initializes the WorkflowManager with agents specified in the config and optional message history.
+
+        Args:
+            workflow (Union[Dict, str]): The workflow configuration. This can be a dictionary or a string which is a path to a JSON file.
+            history (Optional[List[Message]]): The message history.
+            work_dir (str): The working directory.
+            clear_work_dir (bool): If set to True, clears the working directory.
+            send_message_function (Optional[callable]): The function to send messages.
+            connection_id (Optional[str]): The connection identifier.
+        """
+        if isinstance(workflow, str):
+            if os.path.isfile(workflow):
+                with open(workflow, "r") as file:
+                    self.workflow = json.load(file)
+            else:
+                raise FileNotFoundError(f"The file {workflow} does not exist.")
+        elif isinstance(workflow, dict):
+            self.workflow = workflow
+        else:
+            raise ValueError("The 'workflow' parameter should be either a dictionary or a valid JSON file path")
+
+        # TODO - improved typing for workflow
+        self.send_message_function = send_message_function
+        self.connection_id = connection_id
+        self.work_dir = work_dir or "work_dir"
+        if clear_work_dir:
+            clear_folder(self.work_dir)
+        self.agent_history = []
+        self.history = history or []
+        self.sender = None
+        self.receiver = None
+        self.model_client = None
+
+    def _run_workflow(self, message: str, history: Optional[List[Message]] = None, clear_history: bool = False) -> None:
+        """
+        Runs the workflow based on the provided configuration.
+
+        Args:
+            message: The initial message to start the chat.
+            history: A list of messages to populate the agents' history.
+            clear_history: If set to True, clears the chat history before initiating.
+
+        """
+        user_proxy = {
+            "config": {
+                "name": "user_proxy",
+                "human_input_mode": "NEVER",
+                "max_consecutive_auto_reply": 25,
+                "code_execution_config": "local",
+                "default_auto_reply": "TERMINATE",
+                "description": "User Proxy Agent Configuration",
+                "llm_config": False,
+                "type": "userproxy",
+            }
+        }
+        sequential_history = []
+        for i, agent in enumerate(self.workflow.get("agents", [])):
+            workflow = Workflow(
+                name="agent workflow", type=WorkFlowType.autonomous, summary_method=WorkFlowSummaryMethod.llm
+            )
+            workflow = workflow.model_dump(mode="json")
+            agent = agent.get("agent")
+            workflow["agents"] = [
+                {"agent": user_proxy, "link": {"agent_type": "sender"}},
+                {"agent": agent, "link": {"agent_type": "receiver"}},
+            ]
+
+            auto_workflow = AutoWorkflowManager(
+                workflow=workflow,
+                history=history,
+                work_dir=self.work_dir,
+                clear_work_dir=True,
+                send_message_function=self.send_message_function,
+                connection_id=self.connection_id,
+            )
+            task_prompt = (
+                f"""
+            Your primary instructions are as follows:
+            {agent.get("task_instruction")}
+            Context for addressing your task is below:
+            =======
+            {str(sequential_history)}
+            =======
+            Now address your task:
+            """
+                if i > 0
+                else message
+            )
+            result = auto_workflow.run(message=task_prompt, clear_history=clear_history)
+            sequential_history.append(result.content)
+            self.model_client = auto_workflow.receiver.client
+            print(f"======== end of sequence === {i}============")
+            self.agent_history.extend(result.meta.get("messages", []))
+
+    def _generate_output(
+        self,
+        message_text: str,
+        summary_method: str,
+    ) -> str:
+        """
+        Generates the output response based on the workflow configuration and agent history.
+
+        :param message_text: The text of the incoming message.
+        :param flow: An instance of `WorkflowManager`.
+        :param flow_config: An instance of `AgentWorkFlowConfig`.
+        :return: The output response as a string.
+        """
+
+        output = ""
+        if summary_method == WorkFlowSummaryMethod.last:
+            (self.agent_history)
+            last_message = self.agent_history[-1]["message"]["content"] if self.agent_history else ""
+            output = last_message
+        elif summary_method == WorkFlowSummaryMethod.llm:
+            if self.connection_id:
+                status_message = SocketMessage(
+                    type="agent_status",
+                    data={
+                        "status": "summarizing",
+                        "message": "Summarizing agent dialogue",
+                    },
+                    connection_id=self.connection_id,
+                )
+                self.send_message_function(status_message.model_dump(mode="json"))
+            output = summarize_chat_history(
+                task=message_text,
+                messages=self.agent_history,
+                client=self.model_client,
+            )
+
+        elif summary_method == "none":
+            output = ""
+        return output
+
+    def run(self, message: str, history: Optional[List[Message]] = None, clear_history: bool = False) -> Message:
         """
         Initiates a chat between the sender and receiver agents with an initial message
         and an option to clear the history.
@@ -250,11 +568,80 @@ def run(self, message: str, clear_history: bool = False) -> None:
             message: The initial message to start the chat.
             clear_history: If set to True, clears the chat history before initiating.
         """
-        self.sender.initiate_chat(
-            self.receiver,
-            message=message,
-            clear_history=clear_history,
+
+        start_time = time.time()
+        self._run_workflow(message=message, history=history, clear_history=clear_history)
+        end_time = time.time()
+        output = self._generate_output(message, self.workflow.get("summary_method", "last"))
+
+        result_message = Message(
+            content=output,
+            role="assistant",
+            meta={
+                "messages": self.agent_history,
+                "summary_method": self.workflow.get("summary_method", "last"),
+                "time": end_time - start_time,
+                "files": get_modified_files(start_time, end_time, source_dir=self.work_dir),
+                "task": message,
+            },
         )
+        return result_message
+
+
+class WorkflowManager:
+    """
+    WorkflowManager class to load agents from a provided configuration and run a chat between them.
+    """
+
+    def __new__(
+        self,
+        workflow: Union[Dict, str],
+        history: Optional[List[Message]] = None,
+        work_dir: str = None,
+        clear_work_dir: bool = True,
+        send_message_function: Optional[callable] = None,
+        connection_id: Optional[str] = None,
+    ) -> None:
+        """
+        Initializes the WorkflowManager with agents specified in the config and optional message history.
+
+        Args:
+            workflow (Union[Dict, str]): The workflow configuration. This can be a dictionary or a string which is a path to a JSON file.
+            history (Optional[List[Message]]): The message history.
+            work_dir (str): The working directory.
+            clear_work_dir (bool): If set to True, clears the working directory.
+            send_message_function (Optional[callable]): The function to send messages.
+            connection_id (Optional[str]): The connection identifier.
+        """
+        if isinstance(workflow, str):
+            if os.path.isfile(workflow):
+                with open(workflow, "r") as file:
+                    self.workflow = json.load(file)
+            else:
+                raise FileNotFoundError(f"The file {workflow} does not exist.")
+        elif isinstance(workflow, dict):
+            self.workflow = workflow
+        else:
+            raise ValueError("The 'workflow' parameter should be either a dictionary or a valid JSON file path")
+
+        if self.workflow.get("type") == WorkFlowType.autonomous.value:
+            return AutoWorkflowManager(
+                workflow=workflow,
+                history=history,
+                work_dir=work_dir,
+                clear_work_dir=clear_work_dir,
+                send_message_function=send_message_function,
+                connection_id=connection_id,
+            )
+        elif self.workflow.get("type") == WorkFlowType.sequential.value:
+            return SequentialWorkflowManager(
+                workflow=workflow,
+                history=history,
+                work_dir=work_dir,
+                clear_work_dir=clear_work_dir,
+                send_message_function=send_message_function,
+                connection_id=connection_id,
+            )
 
 
 class ExtendedConversableAgent(autogen.ConversableAgent):
diff --git a/samples/apps/autogen-studio/frontend/gatsby-config.ts b/samples/apps/autogen-studio/frontend/gatsby-config.ts
index 9644cfc0389..f66761c24be 100644
--- a/samples/apps/autogen-studio/frontend/gatsby-config.ts
+++ b/samples/apps/autogen-studio/frontend/gatsby-config.ts
@@ -1,5 +1,5 @@
 import type { GatsbyConfig } from "gatsby";
-import fs from 'fs';
+import fs from "fs";
 
 const envFile = `.env.${process.env.NODE_ENV}`;
 
@@ -14,7 +14,7 @@ require("dotenv").config({
 });
 
 const config: GatsbyConfig = {
-  pathPrefix: `${process.env.PREFIX_PATH_VALUE}`,
+  pathPrefix: process.env.PREFIX_PATH_VALUE || '',
   siteMetadata: {
     title: `AutoGen Studio [Beta]`,
     description: `Build Multi-Agent Apps`,
diff --git a/samples/apps/autogen-studio/frontend/package.json b/samples/apps/autogen-studio/frontend/package.json
index da33db85014..7a06f09dac0 100644
--- a/samples/apps/autogen-studio/frontend/package.json
+++ b/samples/apps/autogen-studio/frontend/package.json
@@ -18,6 +18,7 @@
   },
   "dependencies": {
     "@ant-design/charts": "^1.3.6",
+    "@ant-design/plots": "^2.2.2",
     "@headlessui/react": "^1.7.16",
     "@heroicons/react": "^2.0.18",
     "@mdx-js/mdx": "^1.6.22",
@@ -65,7 +66,6 @@
     "@types/react-inner-image-zoom": "^3.0.0",
     "@types/react-resizable": "^3.0.2",
     "@types/uuid": "^9.0.8",
-    "gh-pages": "^4.0.0",
     "typescript": "^4.6.4"
   }
 }
diff --git a/samples/apps/autogen-studio/frontend/src/components/atoms.tsx b/samples/apps/autogen-studio/frontend/src/components/atoms.tsx
index c4c1368a123..a0864153f5a 100644
--- a/samples/apps/autogen-studio/frontend/src/components/atoms.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/atoms.tsx
@@ -751,7 +751,7 @@ export const PdfViewer = ({ url }: { url: string }) => {
           data={url}
           type="application/pdf"
           width="100%"
-          height="450px"
+          style={{ height: "calc(90vh - 200px)" }}
         >
           <p>PDF cannot be displayed.</p>
         </object>
diff --git a/samples/apps/autogen-studio/frontend/src/components/types.ts b/samples/apps/autogen-studio/frontend/src/components/types.ts
index eba39144602..ca51003e7ed 100644
--- a/samples/apps/autogen-studio/frontend/src/components/types.ts
+++ b/samples/apps/autogen-studio/frontend/src/components/types.ts
@@ -9,6 +9,8 @@ export interface IMessage {
   session_id?: number;
   connection_id?: string;
   workflow_id?: number;
+  meta?: any;
+  id?: number;
 }
 
 export interface IStatus {
@@ -21,7 +23,7 @@ export interface IChatMessage {
   text: string;
   sender: "user" | "bot";
   meta?: any;
-  msg_id: string;
+  id?: number;
 }
 
 export interface ILLMConfig {
@@ -63,9 +65,9 @@ export interface IAgent {
 export interface IWorkflow {
   name: string;
   description: string;
-  sender: IAgent;
-  receiver: IAgent;
-  type: "twoagents" | "groupchat";
+  sender?: IAgent;
+  receiver?: IAgent;
+  type?: "autonomous" | "sequential";
   created_at?: string;
   updated_at?: string;
   summary_method?: "none" | "last" | "llm";
@@ -78,7 +80,7 @@ export interface IModelConfig {
   api_key?: string;
   api_version?: string;
   base_url?: string;
-  api_type?: "open_ai" | "azure" | "google";
+  api_type?: "open_ai" | "azure" | "google" | "anthropic" | "mistral";
   user_id?: string;
   created_at?: string;
   updated_at?: string;
@@ -115,6 +117,8 @@ export interface IGalleryItem {
 export interface ISkill {
   name: string;
   content: string;
+  secrets?: any[];
+  libraries?: string[];
   id?: number;
   description?: string;
   user_id?: string;
diff --git a/samples/apps/autogen-studio/frontend/src/components/utils.ts b/samples/apps/autogen-studio/frontend/src/components/utils.ts
index 2264f5c66a2..e70590153a8 100644
--- a/samples/apps/autogen-studio/frontend/src/components/utils.ts
+++ b/samples/apps/autogen-studio/frontend/src/components/utils.ts
@@ -266,6 +266,18 @@ export const sampleModelConfig = (modelType: string = "open_ai") => {
     description: "Google Gemini Model model",
   };
 
+  const anthropicConfig: IModelConfig = {
+    model: "claude-3-5-sonnet-20240620",
+    api_type: "anthropic",
+    description: "Claude 3.5 Sonnet model",
+  };
+
+  const mistralConfig: IModelConfig = {
+    model: "mistral",
+    api_type: "mistral",
+    description: "Mistral model",
+  };
+
   switch (modelType) {
     case "open_ai":
       return openaiConfig;
@@ -273,6 +285,10 @@ export const sampleModelConfig = (modelType: string = "open_ai") => {
       return azureConfig;
     case "google":
       return googleConfig;
+    case "anthropic":
+      return anthropicConfig;
+    case "mistral":
+      return mistralConfig;
     default:
       return openaiConfig;
   }
@@ -286,13 +302,36 @@ export const getRandomIntFromDateAndSalt = (salt: number = 43444) => {
   return randomInt;
 };
 
+export const getSampleWorkflow = (workflow_type: string = "autonomous") => {
+  const autonomousWorkflow: IWorkflow = {
+    name: "Default Chat Workflow",
+    description: "Autonomous Workflow",
+    type: "autonomous",
+    summary_method: "llm",
+  };
+  const sequentialWorkflow: IWorkflow = {
+    name: "Default Sequential Workflow",
+    description: "Sequential Workflow",
+    type: "sequential",
+    summary_method: "llm",
+  };
+
+  if (workflow_type === "autonomous") {
+    return autonomousWorkflow;
+  } else if (workflow_type === "sequential") {
+    return sequentialWorkflow;
+  } else {
+    return autonomousWorkflow;
+  }
+};
+
 export const sampleAgentConfig = (agent_type: string = "assistant") => {
   const llm_config: ILLMConfig = {
     config_list: [],
     temperature: 0.1,
     timeout: 600,
     cache_seed: null,
-    max_tokens: 1000,
+    max_tokens: 4000,
   };
 
   const userProxyConfig: IAgentConfig = {
@@ -357,90 +396,6 @@ export const sampleAgentConfig = (agent_type: string = "assistant") => {
   }
 };
 
-export const sampleWorkflowConfig = (type = "twoagents") => {
-  const llm_model_config: IModelConfig[] = [];
-
-  const llm_config: ILLMConfig = {
-    config_list: llm_model_config,
-    temperature: 0.1,
-    timeout: 600,
-    cache_seed: null,
-    max_tokens: 1000,
-  };
-
-  const userProxyConfig: IAgentConfig = {
-    name: "userproxy",
-    human_input_mode: "NEVER",
-    max_consecutive_auto_reply: 15,
-    system_message: "You are a helpful assistant.",
-    default_auto_reply: "TERMINATE",
-    llm_config: false,
-    code_execution_config: "local",
-  };
-  const userProxyFlowSpec: IAgent = {
-    type: "userproxy",
-    config: userProxyConfig,
-  };
-
-  const assistantConfig: IAgentConfig = {
-    name: "primary_assistant",
-    llm_config: llm_config,
-    human_input_mode: "NEVER",
-    max_consecutive_auto_reply: 8,
-    code_execution_config: "none",
-    system_message:
-      "You are a helpful AI assistant. Solve tasks using your coding and language skills. In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute. 1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself. 2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly. Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill. When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user. If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user. If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try. When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible. Reply 'TERMINATE' in the end when everything is done.",
-  };
-
-  const assistantFlowSpec: IAgent = {
-    type: "assistant",
-    config: assistantConfig,
-  };
-
-  const workFlowConfig: IWorkflow = {
-    name: "Default Agent Workflow",
-    description: "Default Agent Workflow",
-    sender: userProxyFlowSpec,
-    receiver: assistantFlowSpec,
-    type: "twoagents",
-  };
-
-  const groupChatAssistantConfig = Object.assign(
-    {
-      admin_name: "groupchat_assistant",
-      messages: [],
-      max_round: 10,
-      speaker_selection_method: "auto",
-      allow_repeat_speaker: false,
-      description: "Group Chat Assistant",
-    },
-    assistantConfig
-  );
-  groupChatAssistantConfig.name = "groupchat_assistant";
-  groupChatAssistantConfig.system_message =
-    "You are a helpful assistant skilled at cordinating a group of other assistants to solve a task. ";
-
-  const groupChatFlowSpec: IAgent = {
-    type: "groupchat",
-    config: groupChatAssistantConfig,
-  };
-
-  const groupChatWorkFlowConfig: IWorkflow = {
-    name: "Default Group Workflow",
-    description: "Default Group  Workflow",
-    sender: userProxyFlowSpec,
-    receiver: groupChatFlowSpec,
-    type: "groupchat",
-  };
-
-  if (type === "twoagents") {
-    return workFlowConfig;
-  } else if (type === "groupchat") {
-    return groupChatWorkFlowConfig;
-  }
-  return workFlowConfig;
-};
-
 export const getSampleSkill = () => {
   const content = `
 from typing import List
@@ -495,7 +450,7 @@ def generate_and_save_images(query: str, image_size: str = "1024x1024") -> List[
   `;
 
   const skill: ISkill = {
-    name: "generate_images",
+    name: "generate_and_save_images",
     description: "Generate and save images based on a user's query.",
     content: content,
   };
@@ -612,7 +567,7 @@ export const fetchVersion = () => {
  */
 export const sanitizeConfig = (
   data: any,
-  keys: string[] = ["api_key", "id", "created_at", "updated_at"]
+  keys: string[] = ["api_key", "id", "created_at", "updated_at", "secrets"]
 ): any => {
   if (Array.isArray(data)) {
     return data.map((item) => sanitizeConfig(item, keys));
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/agents.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/agents.tsx
index 8800ebfbdd3..6fcb505cc7e 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/agents.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/agents.tsx
@@ -141,13 +141,9 @@ const AgentsView = ({}: any) => {
         icon: DocumentDuplicateIcon,
         onClick: (e: any) => {
           e.stopPropagation();
-          let newAgent = { ...agent };
+          let newAgent = { ...sanitizeConfig(agent) };
           newAgent.config.name = `${agent.config.name}_copy`;
-          newAgent.user_id = user?.email;
-          newAgent.updated_at = new Date().toISOString();
-          if (newAgent.id) {
-            delete newAgent.id;
-          }
+          console.log("newAgent", newAgent);
           setNewAgent(newAgent);
           setShowNewAgentModal(true);
         },
@@ -187,7 +183,7 @@ const AgentsView = ({}: any) => {
             aria-hidden="true"
             className="my-2   break-words"
           >
-            {" "}
+            <div className="text-xs mb-2">{agent.type}</div>{" "}
             {truncateText(agent.config.description || "", 70)}
           </div>
           <div
@@ -353,8 +349,11 @@ const AgentsView = ({}: any) => {
 
           <div className="text-xs mb-2 pb-1  ">
             {" "}
-            Configure an agent that can reused in your agent workflow{" "}
-            {selectedAgent?.config.name}
+            Configure an agent that can reused in your agent workflow .
+            <div>
+              Tip: You can also create a Group of Agents ( New Agent -
+              GroupChat) which can have multiple agents in it.
+            </div>
           </div>
           {agents && agents.length > 0 && (
             <div className="w-full  relative">
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/models.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/models.tsx
index 2a3b0506d79..87ae739b62e 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/models.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/models.tsx
@@ -6,7 +6,7 @@ import {
   PlusIcon,
   TrashIcon,
 } from "@heroicons/react/24/outline";
-import { Button, Dropdown, Input, MenuProps, Modal, message } from "antd";
+import { Dropdown, MenuProps, Modal, message } from "antd";
 import * as React from "react";
 import { IModelConfig, IStatus } from "../../types";
 import { appContext } from "../../../hooks/provider";
@@ -17,14 +17,7 @@ import {
   timeAgo,
   truncateText,
 } from "../../utils";
-import {
-  BounceLoader,
-  Card,
-  CardHoverBar,
-  ControlRowView,
-  LoadingOverlay,
-} from "../../atoms";
-import TextArea from "antd/es/input/TextArea";
+import { BounceLoader, Card, CardHoverBar, LoadingOverlay } from "../../atoms";
 import { ModelConfigView } from "./utils/modelconfig";
 
 const ModelsView = ({}: any) => {
@@ -175,13 +168,8 @@ const ModelsView = ({}: any) => {
         icon: DocumentDuplicateIcon,
         onClick: (e: any) => {
           e.stopPropagation();
-          let newModel = { ...model };
-          newModel.model = `${model.model} Copy`;
-          newModel.user_id = user?.email;
-          newModel.updated_at = new Date().toISOString();
-          if (newModel.id) {
-            delete newModel.id;
-          }
+          let newModel = { ...sanitizeConfig(model) };
+          newModel.model = `${model.model}_copy`;
           setNewModel(newModel);
           setShowNewModelModal(true);
         },
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/skills.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/skills.tsx
index 77b50588dd2..7d3dfe75611 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/skills.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/skills.tsx
@@ -1,12 +1,15 @@
 import {
   ArrowDownTrayIcon,
   ArrowUpTrayIcon,
+  CodeBracketIcon,
+  CodeBracketSquareIcon,
   DocumentDuplicateIcon,
   InformationCircleIcon,
+  KeyIcon,
   PlusIcon,
   TrashIcon,
 } from "@heroicons/react/24/outline";
-import { Button, Input, Modal, message, MenuProps, Dropdown } from "antd";
+import { Button, Input, Modal, message, MenuProps, Dropdown, Tabs } from "antd";
 import * as React from "react";
 import { ISkill, IStatus } from "../../types";
 import { appContext } from "../../../hooks/provider";
@@ -25,6 +28,8 @@ import {
   LoadingOverlay,
   MonacoEditor,
 } from "../../atoms";
+import { SkillSelector } from "./utils/selectors";
+import { SkillConfigView } from "./utils/skillconfig";
 
 const SkillsView = ({}: any) => {
   const [loading, setLoading] = React.useState(false);
@@ -109,38 +114,6 @@ const SkillsView = ({}: any) => {
     fetchJSON(listSkillsUrl, payLoad, onSuccess, onError);
   };
 
-  const saveSkill = (skill: ISkill) => {
-    setError(null);
-    setLoading(true);
-    // const fetch;
-    skill.user_id = user?.email;
-    const payLoad = {
-      method: "POST",
-      headers: {
-        Accept: "application/json",
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify(skill),
-    };
-
-    const onSuccess = (data: any) => {
-      if (data && data.status) {
-        message.success(data.message);
-        const updatedSkills = [data.data].concat(skills || []);
-        setSkills(updatedSkills);
-      } else {
-        message.error(data.message);
-      }
-      setLoading(false);
-    };
-    const onError = (err: any) => {
-      setError(err);
-      message.error(err.message);
-      setLoading(false);
-    };
-    fetchJSON(saveSkillsUrl, payLoad, onSuccess, onError);
-  };
-
   React.useEffect(() => {
     if (user) {
       // console.log("fetching messages", messages);
@@ -173,12 +146,8 @@ const SkillsView = ({}: any) => {
         icon: DocumentDuplicateIcon,
         onClick: (e: any) => {
           e.stopPropagation();
-          let newSkill = { ...skill };
-          newSkill.name = `${skill.name} Copy`;
-          newSkill.user_id = user?.email;
-          if (newSkill.id) {
-            delete newSkill.id;
-          }
+          let newSkill = { ...sanitizeConfig(skill) };
+          newSkill.name = `${skill.name}_copy`;
           setNewSkill(newSkill);
           setShowNewSkillModal(true);
         },
@@ -245,6 +214,15 @@ const SkillsView = ({}: any) => {
   }) => {
     const editorRef = React.useRef<any | null>(null);
     const [localSkill, setLocalSkill] = React.useState<ISkill | null>(skill);
+
+    const closeModal = () => {
+      setSkill(null);
+      setShowSkillModal(false);
+      if (handler) {
+        handler(skill);
+      }
+    };
+
     return (
       <Modal
         title={
@@ -258,54 +236,14 @@ const SkillsView = ({}: any) => {
         onCancel={() => {
           setShowSkillModal(false);
         }}
-        footer={[
-          <Button
-            key="back"
-            onClick={() => {
-              setShowSkillModal(false);
-            }}
-          >
-            Cancel
-          </Button>,
-          <Button
-            key="submit"
-            type="primary"
-            loading={loading}
-            onClick={() => {
-              setShowSkillModal(false);
-              if (editorRef.current) {
-                const value = editorRef.current.getValue();
-                const updatedSkill = { ...localSkill, content: value };
-                setSkill(updatedSkill);
-                handler(updatedSkill);
-              }
-            }}
-          >
-            Save
-          </Button>,
-        ]}
+        footer={[]}
       >
         {localSkill && (
-          <div style={{ minHeight: "70vh" }}>
-            <div className="mb-2">
-              <Input
-                placeholder="Skill Name"
-                value={localSkill.name}
-                onChange={(e) => {
-                  const updatedSkill = { ...localSkill, name: e.target.value };
-                  setLocalSkill(updatedSkill);
-                }}
-              />
-            </div>
-
-            <div style={{ height: "70vh" }} className="h-full  mt-2 rounded">
-              <MonacoEditor
-                value={localSkill?.content}
-                language="python"
-                editorRef={editorRef}
-              />
-            </div>
-          </div>
+          <SkillConfigView
+            skill={localSkill}
+            setSkill={setLocalSkill}
+            close={closeModal}
+          />
         )}
       </Modal>
     );
@@ -367,17 +305,17 @@ const SkillsView = ({}: any) => {
         showSkillModal={showSkillModal}
         setShowSkillModal={setShowSkillModal}
         handler={(skill: ISkill) => {
-          saveSkill(skill);
+          fetchSkills();
         }}
       />
 
       <SkillModal
-        skill={newSkill}
+        skill={newSkill || sampleSkill}
         setSkill={setNewSkill}
         showSkillModal={showNewSkillModal}
         setShowSkillModal={setShowNewSkillModal}
         handler={(skill: ISkill) => {
-          saveSkill(skill);
+          fetchSkills();
         }}
       />
 
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/agentconfig.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/agentconfig.tsx
index 17ab037485d..885a1e402d0 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/agentconfig.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/agentconfig.tsx
@@ -63,7 +63,7 @@ export const AgentConfigView = ({
   const llm_config: ILLMConfig = agent?.config?.llm_config || {
     config_list: [],
     temperature: 0.1,
-    max_tokens: 1000,
+    max_tokens: 4000,
   };
 
   const createAgent = (agent: IAgent) => {
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/export.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/export.tsx
new file mode 100644
index 00000000000..bb74bd0e2e3
--- /dev/null
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/export.tsx
@@ -0,0 +1,207 @@
+import { Button, Modal, message } from "antd";
+import * as React from "react";
+import { IWorkflow } from "../../../types";
+import { ArrowDownTrayIcon } from "@heroicons/react/24/outline";
+import {
+  checkAndSanitizeInput,
+  fetchJSON,
+  getServerUrl,
+  sanitizeConfig,
+} from "../../../utils";
+import { appContext } from "../../../../hooks/provider";
+import { CodeBlock } from "../../../atoms";
+
+export const ExportWorkflowModal = ({
+  workflow,
+  show,
+  setShow,
+}: {
+  workflow: IWorkflow | null;
+  show: boolean;
+  setShow: (show: boolean) => void;
+}) => {
+  const serverUrl = getServerUrl();
+  const { user } = React.useContext(appContext);
+
+  const [error, setError] = React.useState<any>(null);
+  const [loading, setLoading] = React.useState<boolean>(false);
+  const [workflowDetails, setWorkflowDetails] = React.useState<any>(null);
+
+  const getWorkflowCode = (workflow: IWorkflow) => {
+    const workflowCode = `from autogenstudio import WorkflowManager
+# load workflow from exported json workflow file.
+workflow_manager = WorkflowManager(workflow="path/to/your/workflow_.json")
+
+# run the workflow on a task
+task_query = "What is the height of the Eiffel Tower?. Dont write code, just respond to the question."
+workflow_manager.run(message=task_query)`;
+    return workflowCode;
+  };
+
+  const getCliWorkflowCode = (workflow: IWorkflow) => {
+    const workflowCode = `autogenstudio serve --workflow=workflow.json --port=5000
+    `;
+    return workflowCode;
+  };
+
+  const getGunicornWorkflowCode = (workflow: IWorkflow) => {
+    const workflowCode = `gunicorn -w $((2 * $(getconf _NPROCESSORS_ONLN) + 1)) --timeout 12600 -k uvicorn.workers.UvicornWorker autogenstudio.web.app:app --bind `;
+
+    return workflowCode;
+  };
+
+  const fetchWorkFlow = (workflow: IWorkflow) => {
+    setError(null);
+    setLoading(true);
+    // const fetch;
+    const payLoad = {
+      method: "GET",
+      headers: {
+        "Content-Type": "application/json",
+      },
+    };
+    const downloadWorkflowUrl = `${serverUrl}/workflows/export/${workflow.id}?user_id=${user?.email}`;
+
+    const onSuccess = (data: any) => {
+      if (data && data.status) {
+        setWorkflowDetails(data.data);
+        console.log("workflow details", data.data);
+
+        const sanitized_name =
+          checkAndSanitizeInput(workflow.name).sanitizedText || workflow.name;
+        const file_name = `workflow_${sanitized_name}.json`;
+        const workflowData = sanitizeConfig(data.data);
+        const file = new Blob([JSON.stringify(workflowData)], {
+          type: "application/json",
+        });
+        const downloadUrl = URL.createObjectURL(file);
+        const a = document.createElement("a");
+        a.href = downloadUrl;
+        a.download = file_name;
+        a.click();
+      } else {
+        message.error(data.message);
+      }
+      setLoading(false);
+    };
+    const onError = (err: any) => {
+      setError(err);
+      message.error(err.message);
+      setLoading(false);
+    };
+    fetchJSON(downloadWorkflowUrl, payLoad, onSuccess, onError);
+  };
+
+  React.useEffect(() => {
+    if (workflow && workflow.id && show) {
+      // fetchWorkFlow(workflow.id);
+      console.log("workflow modal ... component loaded", workflow);
+    }
+  }, [show]);
+
+  return (
+    <Modal
+      title={
+        <>
+          Export Workflow
+          <span className="text-accent font-normal ml-2">
+            {workflow?.name}
+          </span>{" "}
+        </>
+      }
+      width={800}
+      open={show}
+      onOk={() => {
+        setShow(false);
+      }}
+      onCancel={() => {
+        setShow(false);
+      }}
+      footer={[]}
+    >
+      <div>
+        <div>
+          {" "}
+          You can use the following steps to start integrating your workflow
+          into your application.{" "}
+        </div>
+        {workflow && workflow.id && (
+          <>
+            <div className="flex mt-2 gap-3">
+              <div>
+                <div className="text-sm mt-2 mb-2 pb-1 font-bold">Step 1</div>
+                <div className="mt-2 mb-2 pb-1 text-xs">
+                  Download your workflow as a JSON file by clicking the button
+                  below.
+                </div>
+
+                <div className="text-sm mt-2 mb-2 pb-1">
+                  <Button
+                    type="primary"
+                    loading={loading}
+                    onClick={() => {
+                      fetchWorkFlow(workflow);
+                    }}
+                  >
+                    Download
+                    <ArrowDownTrayIcon className="h-4 w-4 inline-block ml-2 -mt-1" />
+                  </Button>
+                </div>
+              </div>
+
+              <div>
+                <div className="text-sm mt-2 mb-2 pb-1 font-bold">Step 2</div>
+                <div className=" mt-2 mb-2 pb-1 text-xs">
+                  Copy the following code snippet and paste it into your
+                  application to run your workflow on a task.
+                </div>
+                <div className="text-sm mt-2 mb-2 pb-1">
+                  <CodeBlock
+                    className="text-xs"
+                    code={getWorkflowCode(workflow)}
+                    language="python"
+                    wrapLines={true}
+                  />
+                </div>
+              </div>
+            </div>
+
+            <div>
+              <div className="text-sm mt-2 mb-2 pb-1 font-bold">
+                Step 3 (Deploy)
+              </div>
+              <div className=" mt-2 mb-2 pb-1 text-xs">
+                You can also deploy your workflow as an API endpoint using the
+                autogenstudio python CLI.
+              </div>
+
+              <div className="text-sm mt-2 mb-2 pb-1">
+                <CodeBlock
+                  className="text-xs"
+                  code={getCliWorkflowCode(workflow)}
+                  language="bash"
+                  wrapLines={true}
+                />
+
+                <div className="text-xs mt-2">
+                  Note: this will start a endpoint on port 5000. You can change
+                  the port by changing the port number. You can also scale this
+                  using multiple workers (e.g., via an application server like
+                  gunicorn) or wrap it in a docker container and deploy on a
+                  cloud provider like Azure.
+                </div>
+
+                <CodeBlock
+                  className="text-xs"
+                  code={getGunicornWorkflowCode(workflow)}
+                  language="bash"
+                  wrapLines={true}
+                />
+              </div>
+            </div>
+          </>
+        )}
+      </div>
+    </Modal>
+  );
+};
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/modelconfig.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/modelconfig.tsx
index 9f9c64500f7..c4a39956ba0 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/modelconfig.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/modelconfig.tsx
@@ -23,18 +23,35 @@ const ModelTypeSelector = ({
       value: "open_ai",
       description: "OpenAI or other endpoints that implement the OpenAI API",
       icon: <CpuChipIcon className="h-6 w-6 text-primary" />,
+      hint: "In addition to OpenAI models, You can also use OSS models via tools like Ollama, vLLM, LMStudio etc. that provide OpenAI compatible endpoint.",
     },
     {
       label: "Azure OpenAI",
       value: "azure",
       description: "Azure OpenAI endpoint",
       icon: <CpuChipIcon className="h-6 w-6 text-primary" />,
+      hint: "Azure OpenAI endpoint",
     },
     {
       label: "Gemini",
       value: "google",
       description: "Gemini",
       icon: <CpuChipIcon className="h-6 w-6 text-primary" />,
+      hint: "Gemini",
+    },
+    {
+      label: "Claude",
+      value: "anthropic",
+      description: "Anthropic Claude",
+      icon: <CpuChipIcon className="h-6 w-6 text-primary" />,
+      hint: "Anthropic Claude models",
+    },
+    {
+      label: "Mistral",
+      value: "mistral",
+      description: "Mistral",
+      icon: <CpuChipIcon className="h-6 w-6 text-primary" />,
+      hint: "Mistral models",
     },
   ];
 
@@ -46,7 +63,7 @@ const ModelTypeSelector = ({
     return (
       <li
         onMouseEnter={() => {
-          setSelectedHint(modelType.value);
+          setSelectedHint(modelType.hint);
         }}
         role="listitem"
         key={"modeltype" + i}
@@ -78,13 +95,6 @@ const ModelTypeSelector = ({
     );
   });
 
-  const hints: any = {
-    open_ai:
-      "In addition to OpenAI models, You can also use OSS models via tools like Ollama, vLLM, LMStudio etc. that provide OpenAI compatible endpoint.",
-    azure: "Azure OpenAI endpoint",
-    google: "Gemini",
-  };
-
   const [selectedHint, setSelectedHint] = React.useState<string>("open_ai");
 
   return (
@@ -94,7 +104,7 @@ const ModelTypeSelector = ({
 
       <div className="text-xs mt-4">
         <InformationCircleIcon className="h-4 w-4 inline mr-1 -mt-1" />
-        {hints[selectedHint]}
+        {selectedHint}
       </div>
     </>
   );
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/selectors.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/selectors.tsx
index f3c9cb08aa9..79275fe4ba2 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/selectors.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/selectors.tsx
@@ -3,10 +3,10 @@ import { IAgent, IModelConfig, ISkill, IWorkflow } from "../../../types";
 import { Card } from "../../../atoms";
 import {
   fetchJSON,
+  getSampleWorkflow,
   getServerUrl,
   obscureString,
   sampleAgentConfig,
-  sampleWorkflowConfig,
   truncateText,
 } from "../../../utils";
 import {
@@ -19,6 +19,8 @@ import {
   theme,
 } from "antd";
 import {
+  ArrowLongRightIcon,
+  ChatBubbleLeftRightIcon,
   CodeBracketSquareIcon,
   ExclamationTriangleIcon,
   InformationCircleIcon,
@@ -354,7 +356,7 @@ export const AgentTypeSelector = ({
 
   return (
     <>
-      <div className="pb-3">Select Agent Type</div>
+      <div className="pb-3 text-primary">Select Agent Type</div>
       <ul className="inline-flex gap-2">{agentTypeRows}</ul>
     </>
   );
@@ -370,10 +372,18 @@ export const WorkflowTypeSelector = ({
   const iconClass = "h-6 w-6 inline-block ";
   const workflowTypes = [
     {
-      label: "Default",
-      value: "default",
-      description: <> Includes a sender and receiver. </>,
-      icon: <UserCircleIcon className={iconClass} />,
+      label: "Autonomous (Chat)",
+      value: "autonomous",
+      description:
+        "Includes an initiator and receiver. The initiator is typically a user proxy agent, while the receiver could be any agent type (assistant or groupchat",
+      icon: <ChatBubbleLeftRightIcon className={iconClass} />,
+    },
+    {
+      label: "Sequential",
+      value: "sequential",
+      description:
+        " Includes a list of agents in a given order. Each agent should have an nstruction and will summarize and pass on the results of their work to the next agent",
+      icon: <ArrowLongRightIcon className={iconClass} />,
     },
   ];
   const [seletectedWorkflowType, setSelectedWorkflowType] = React.useState<
@@ -390,7 +400,7 @@ export const WorkflowTypeSelector = ({
           onClick={() => {
             setSelectedWorkflowType(workflowType.value);
             if (workflow) {
-              const sampleWorkflow = sampleWorkflowConfig();
+              const sampleWorkflow = getSampleWorkflow(workflowType.value);
               setWorkflow(sampleWorkflow);
             }
           }}
@@ -398,9 +408,12 @@ export const WorkflowTypeSelector = ({
           <div style={{ minHeight: "35px" }} className="my-2   break-words">
             {" "}
             <div className="mb-2">{workflowType.icon}</div>
-            <span className="text-secondary  tex-sm">
+            <span
+              className="text-secondary  tex-sm"
+              title={workflowType.description}
+            >
               {" "}
-              {workflowType.description}
+              {truncateText(workflowType.description, 60)}
             </span>
           </div>
         </Card>
@@ -410,7 +423,7 @@ export const WorkflowTypeSelector = ({
 
   return (
     <>
-      <div className="pb-3">Select Workflow Type</div>
+      <div className="pb-3 text-primary">Select Workflow Type</div>
       <ul className="inline-flex gap-2">{workflowTypeRows}</ul>
     </>
   );
@@ -964,17 +977,15 @@ export const ModelSelector = ({ agentId }: { agentId: number }) => {
 };
 
 export const WorkflowAgentSelector = ({
-  workflowId,
+  workflow,
 }: {
-  workflowId: number;
+  workflow: IWorkflow;
 }) => {
   const [error, setError] = useState<string | null>(null);
   const [loading, setLoading] = useState<boolean>(false);
   const [agents, setAgents] = useState<IAgent[]>([]);
-  const [senderTargetAgents, setSenderTargetAgents] = useState<IAgent[]>([]);
-  const [receiverTargetAgents, setReceiverTargetAgents] = useState<IAgent[]>(
-    []
-  );
+  const [linkedAgents, setLinkedAgents] = useState<any[]>([]);
+
   const serverUrl = getServerUrl();
   const { user } = React.useContext(appContext);
 
@@ -1008,11 +1019,8 @@ export const WorkflowAgentSelector = ({
     fetchJSON(listAgentsUrl, payLoad, onSuccess, onError);
   };
 
-  const fetchTargetAgents = (
-    setTarget: (arg0: any) => void,
-    agentType: string
-  ) => {
-    const listTargetAgentsUrl = `${serverUrl}/workflows/link/agent/${workflowId}/${agentType}`;
+  const fetchLinkedAgents = () => {
+    const listTargetAgentsUrl = `${serverUrl}/workflows/link/agent/${workflow.id}`;
     setError(null);
     setLoading(true);
     const payLoad = {
@@ -1024,7 +1032,8 @@ export const WorkflowAgentSelector = ({
 
     const onSuccess = (data: any) => {
       if (data && data.status) {
-        setTarget(data.data);
+        setLinkedAgents(data.data);
+        console.log("linked agents", data.data);
       } else {
         message.error(data.message);
       }
@@ -1042,7 +1051,8 @@ export const WorkflowAgentSelector = ({
   const linkWorkflowAgent = (
     workflowId: number,
     targetAgentId: number,
-    agentType: string
+    agentType: string,
+    sequenceId?: number
   ) => {
     setError(null);
     setLoading(true);
@@ -1052,15 +1062,15 @@ export const WorkflowAgentSelector = ({
         "Content-Type": "application/json",
       },
     };
-    const linkAgentUrl = `${serverUrl}/workflows/link/agent/${workflowId}/${targetAgentId}/${agentType}`;
+    let linkAgentUrl;
+    linkAgentUrl = `${serverUrl}/workflows/link/agent/${workflowId}/${targetAgentId}/${agentType}`;
+    if (agentType === "sequential") {
+      linkAgentUrl = `${serverUrl}/workflows/link/agent/${workflowId}/${targetAgentId}/${agentType}/${sequenceId}`;
+    }
     const onSuccess = (data: any) => {
       if (data && data.status) {
         message.success(data.message);
-        if (agentType === "sender") {
-          fetchTargetAgents(setSenderTargetAgents, "sender");
-        } else {
-          fetchTargetAgents(setReceiverTargetAgents, "receiver");
-        }
+        fetchLinkedAgents();
       } else {
         message.error(data.message);
       }
@@ -1076,11 +1086,7 @@ export const WorkflowAgentSelector = ({
     fetchJSON(linkAgentUrl, payLoad, onSuccess, onError);
   };
 
-  const unlinkWorkflowAgent = (
-    workflowId: number,
-    targetAgentId: number,
-    agentType: string
-  ) => {
+  const unlinkWorkflowAgent = (agent: IAgent, link: any) => {
     setError(null);
     setLoading(true);
     const payLoad = {
@@ -1089,16 +1095,17 @@ export const WorkflowAgentSelector = ({
         "Content-Type": "application/json",
       },
     };
-    const unlinkAgentUrl = `${serverUrl}/workflows/link/agent/${workflowId}/${targetAgentId}/${agentType}`;
+
+    let unlinkAgentUrl;
+    unlinkAgentUrl = `${serverUrl}/workflows/link/agent/${workflow.id}/${agent.id}/${link.agent_type}`;
+    if (link.agent_type === "sequential") {
+      unlinkAgentUrl = `${serverUrl}/workflows/link/agent/${workflow.id}/${agent.id}/${link.agent_type}/${link.sequence_id}`;
+    }
 
     const onSuccess = (data: any) => {
       if (data && data.status) {
         message.success(data.message);
-        if (agentType === "sender") {
-          fetchTargetAgents(setSenderTargetAgents, "sender");
-        } else {
-          fetchTargetAgents(setReceiverTargetAgents, "receiver");
-        }
+        fetchLinkedAgents();
       } else {
         message.error(data.message);
       }
@@ -1116,8 +1123,7 @@ export const WorkflowAgentSelector = ({
 
   useEffect(() => {
     fetchAgents();
-    fetchTargetAgents(setSenderTargetAgents, "sender");
-    fetchTargetAgents(setReceiverTargetAgents, "receiver");
+    fetchLinkedAgents();
   }, []);
 
   const agentItems: MenuProps["items"] =
@@ -1145,9 +1151,26 @@ export const WorkflowAgentSelector = ({
   const receiverOnclick: MenuProps["onClick"] = ({ key }) => {
     const selectedIndex = parseInt(key.toString());
     let selectedAgent = agents[selectedIndex];
+    if (selectedAgent && selectedAgent.id && workflow.id) {
+      linkWorkflowAgent(workflow.id, selectedAgent.id, "receiver");
+    }
+  };
 
-    if (selectedAgent && selectedAgent.id) {
-      linkWorkflowAgent(workflowId, selectedAgent.id, "receiver");
+  const sequenceOnclick: MenuProps["onClick"] = ({ key }) => {
+    const selectedIndex = parseInt(key.toString());
+    let selectedAgent = agents[selectedIndex];
+
+    if (selectedAgent && selectedAgent.id && workflow.id) {
+      const sequenceId =
+        linkedAgents.length > 0
+          ? linkedAgents[linkedAgents.length - 1].link.sequence_id + 1
+          : 0;
+      linkWorkflowAgent(
+        workflow.id,
+        selectedAgent.id,
+        "sequential",
+        sequenceId
+      );
     }
   };
 
@@ -1155,18 +1178,16 @@ export const WorkflowAgentSelector = ({
     const selectedIndex = parseInt(key.toString());
     let selectedAgent = agents[selectedIndex];
 
-    if (selectedAgent && selectedAgent.id) {
-      linkWorkflowAgent(workflowId, selectedAgent.id, "sender");
+    if (selectedAgent && selectedAgent.id && workflow.id) {
+      linkWorkflowAgent(workflow.id, selectedAgent.id, "sender");
     }
   };
 
-  const handleRemoveAgent = (index: number, agentType: string) => {
-    const targetAgents =
-      agentType === "sender" ? senderTargetAgents : receiverTargetAgents;
-    const agent = targetAgents[index];
-    if (agent && agent.id) {
-      unlinkWorkflowAgent(workflowId, agent.id, agentType);
+  const handleRemoveAgent = (agent: IAgent, link: any) => {
+    if (agent && agent.id && workflow.id) {
+      unlinkWorkflowAgent(agent, link);
     }
+    console.log(link);
   };
 
   const { token } = useToken();
@@ -1185,9 +1206,11 @@ export const WorkflowAgentSelector = ({
     onClick: MenuProps["onClick"];
     agentType: string;
   }) => {
-    const targetAgents =
-      agentType === "sender" ? senderTargetAgents : receiverTargetAgents;
-    const agentButtons = targetAgents.map((agent, i) => {
+    const targetAgents = linkedAgents.filter(
+      (row) => row.link.agent_type === agentType
+    );
+
+    const agentButtons = targetAgents.map(({ agent, link }, i) => {
       const tooltipText = (
         <>
           <div>{agent.config.name}</div>
@@ -1197,32 +1220,38 @@ export const WorkflowAgentSelector = ({
         </>
       );
       return (
-        <div
-          key={"agentrow_" + i}
-          className="mr-1 mb-1 p-1 px-2 rounded border"
-        >
-          <div className="inline-flex">
+        <div key={"agentrow_" + i}>
+          <div className="mr-1 mb-1 p-1 px-2 rounded border inline-block">
             {" "}
-            <Tooltip title={tooltipText}>
-              <div>{agent.config.name}</div>{" "}
-            </Tooltip>
-            <div
-              role="button"
-              onClick={(e) => {
-                e.stopPropagation(); // Prevent opening the modal to edit
-                handleRemoveAgent(i, agentType);
-              }}
-              className="ml-1 text-primary hover:text-accent duration-300"
-            >
-              <XMarkIcon className="w-4 h-4 inline-block" />
+            <div className="inline-flex">
+              {" "}
+              <Tooltip title={tooltipText}>
+                <div>{agent.config.name}</div>{" "}
+              </Tooltip>
+              <div
+                role="button"
+                onClick={(e) => {
+                  e.stopPropagation(); // Prevent opening the modal to edit
+                  handleRemoveAgent(agent, link);
+                }}
+                className="ml-1 text-primary hover:text-accent duration-300"
+              >
+                <XMarkIcon className="w-4 h-4 inline-block" />
+              </div>
             </div>
           </div>
+          {link.agent_type === "sequential" &&
+            i !== targetAgents.length - 1 && (
+              <div className="inline-block mx-2">
+                <ArrowLongRightIcon className="w-4 h-4 text-secondary inline-block  " />{" "}
+              </div>
+            )}
         </div>
       );
     });
 
     return (
-      <div>
+      <div className="text-primary">
         <div>
           {(!targetAgents || targetAgents.length === 0) && (
             <div className="text-sm border rounded text-secondary p-2 my-2">
@@ -1239,13 +1268,14 @@ export const WorkflowAgentSelector = ({
             remove current agents and add new ones.
           </div>
         )}
-        {targetAgents && targetAgents.length < 1 && (
+        {((targetAgents.length < 1 && agentType !== "sequential") ||
+          agentType === "sequential") && (
           <Dropdown
             menu={{ items: agentItems, onClick: onClick }}
             placement="bottomRight"
             trigger={["click"]}
             dropdownRender={(menu) => (
-              <div style={contentStyle}>
+              <div className="h-64" style={contentStyle}>
                 {React.cloneElement(menu as React.ReactElement, {
                   style: { boxShadow: "none" },
                 })}
@@ -1268,7 +1298,7 @@ export const WorkflowAgentSelector = ({
             <div className="pt-2 border-dashed border-t mt-2">
               {" "}
               <div
-                className=" inline-flex mr-1 mb-1 p-1 px-2 rounded border hover:border-accent duration-300 hover:text-accent"
+                className=" inline-flex mr-1 mb-1 p-1 px-2 rounded border hover:border-accent text-primary duration-300 hover:text-accent"
                 role="button"
               >
                 Add {title} <PlusIcon className="w-4 h-4 inline-block mt-1" />
@@ -1282,33 +1312,48 @@ export const WorkflowAgentSelector = ({
 
   return (
     <div>
-      <div className="grid grid-cols-2 gap-3">
-        <div>
-          <h3 className="text-sm mb-2">
-            Initiator{" "}
-            <Tooltip title={"Agent that initiates the conversation"}>
-              <InformationCircleIcon className="h-4 w-4 inline-block" />
-            </Tooltip>
-          </h3>
-          <ul>
-            <AddAgentDropDown
-              title="Sender"
-              onClick={senderOnClick}
-              agentType="sender"
-            />
-          </ul>
+      {workflow.type === "autonomous" && (
+        <div className="grid grid-cols-2 gap-3">
+          <div>
+            <h3 className="text-sm mb-2">
+              Initiator{" "}
+              <Tooltip title={"Agent that initiates the conversation"}>
+                <InformationCircleIcon className="h-4 w-4 inline-block" />
+              </Tooltip>
+            </h3>
+            <ul>
+              <AddAgentDropDown
+                title="Sender"
+                onClick={senderOnClick}
+                agentType="sender"
+              />
+            </ul>
+          </div>
+          <div>
+            <h3 className="text-sm mb-2">Receiver</h3>
+            <ul>
+              <AddAgentDropDown
+                title="Receiver"
+                onClick={receiverOnclick}
+                agentType="receiver"
+              />
+            </ul>
+          </div>
         </div>
-        <div>
-          <h3 className="text-sm mb-2">Receiver</h3>
+      )}
+
+      {workflow.type === "sequential" && (
+        <div className="text-primary">
+          <div className="text-sm mb-2">Agents</div>
           <ul>
             <AddAgentDropDown
-              title="Receiver"
-              onClick={receiverOnclick}
-              agentType="receiver"
+              title="Agent"
+              onClick={sequenceOnclick}
+              agentType="sequential"
             />
           </ul>
         </div>
-      </div>
+      )}
     </div>
   );
 };
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/skillconfig.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/skillconfig.tsx
new file mode 100644
index 00000000000..8a7a2f24c7f
--- /dev/null
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/skillconfig.tsx
@@ -0,0 +1,295 @@
+import React from "react";
+import { fetchJSON, getServerUrl, sampleModelConfig } from "../../../utils";
+import { Button, Input, message, theme } from "antd";
+import {
+  CpuChipIcon,
+  EyeIcon,
+  EyeSlashIcon,
+  InformationCircleIcon,
+  PlusIcon,
+  TrashIcon,
+} from "@heroicons/react/24/outline";
+import { ISkill, IStatus } from "../../../types";
+import { Card, ControlRowView, MonacoEditor } from "../../../atoms";
+import TextArea from "antd/es/input/TextArea";
+import { appContext } from "../../../../hooks/provider";
+
+const SecretsEditor = ({
+  secrets = [],
+  updateSkillConfig,
+}: {
+  secrets: { secret: string; value: string }[];
+  updateSkillConfig: (key: string, value: any) => void;
+}) => {
+  const [editingIndex, setEditingIndex] = React.useState<number | null>(null);
+  const [newSecret, setNewSecret] = React.useState<string>("");
+  const [newValue, setNewValue] = React.useState<string>("");
+
+  const toggleEditing = (index: number) => {
+    setEditingIndex(editingIndex === index ? null : index);
+  };
+
+  const handleAddSecret = () => {
+    if (newSecret && newValue) {
+      const updatedSecrets = [
+        ...secrets,
+        { secret: newSecret, value: newValue },
+      ];
+      updateSkillConfig("secrets", updatedSecrets);
+      setNewSecret("");
+      setNewValue("");
+    }
+  };
+
+  const handleRemoveSecret = (index: number) => {
+    const updatedSecrets = secrets.filter((_, i) => i !== index);
+    updateSkillConfig("secrets", updatedSecrets);
+  };
+
+  const handleSecretChange = (index: number, key: string, value: string) => {
+    const updatedSecrets = secrets.map((item, i) =>
+      i === index ? { ...item, [key]: value } : item
+    );
+    updateSkillConfig("secrets", updatedSecrets);
+  };
+
+  return (
+    <div className="mt-4">
+      {secrets && (
+        <div className="flex flex-col gap-2">
+          {secrets.map((secret, index) => (
+            <div key={index} className="flex items-center gap-2">
+              <Input
+                value={secret.secret}
+                disabled={editingIndex !== index}
+                onChange={(e) =>
+                  handleSecretChange(index, "secret", e.target.value)
+                }
+                className="flex-1"
+              />
+              <Input.Password
+                value={secret.value}
+                visibilityToggle
+                disabled={editingIndex !== index}
+                onChange={(e) =>
+                  handleSecretChange(index, "value", e.target.value)
+                }
+                className="flex-1"
+              />
+              <Button
+                icon={
+                  editingIndex === index ? (
+                    <EyeSlashIcon className="h-5 w-5" />
+                  ) : (
+                    <EyeIcon className="h-5 w-5" />
+                  )
+                }
+                onClick={() => toggleEditing(index)}
+              />
+              <Button
+                icon={<TrashIcon className="h-5 w-5" />}
+                onClick={() => handleRemoveSecret(index)}
+              />
+            </div>
+          ))}
+        </div>
+      )}
+      <div className="flex items-center gap-2 mt-2">
+        <Input
+          placeholder="New Secret"
+          value={newSecret}
+          onChange={(e) => setNewSecret(e.target.value)}
+          className="flex-1"
+        />
+        <Input.Password
+          placeholder="New Value"
+          value={newValue}
+          onChange={(e) => setNewValue(e.target.value)}
+          className="flex-1"
+        />
+        <Button
+          icon={<PlusIcon className="h-5 w-5" />}
+          onClick={handleAddSecret}
+        />
+      </div>
+    </div>
+  );
+};
+
+export const SkillConfigView = ({
+  skill,
+  setSkill,
+  close,
+}: {
+  skill: ISkill;
+  setSkill: (newModel: ISkill) => void;
+  close: () => void;
+}) => {
+  const [loading, setLoading] = React.useState(false);
+
+  const serverUrl = getServerUrl();
+  const { user } = React.useContext(appContext);
+  const testModelUrl = `${serverUrl}/skills/test`;
+  const createSkillUrl = `${serverUrl}/skills`;
+
+  const createSkill = (skill: ISkill) => {
+    setLoading(true);
+    skill.user_id = user?.email;
+    const payLoad = {
+      method: "POST",
+      headers: {
+        Accept: "application/json",
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(skill),
+    };
+
+    const onSuccess = (data: any) => {
+      if (data && data.status) {
+        message.success(data.message);
+        setSkill(data.data);
+      } else {
+        message.error(data.message);
+      }
+      setLoading(false);
+    };
+    const onError = (err: any) => {
+      message.error(err.message);
+      setLoading(false);
+    };
+    const onFinal = () => {
+      setLoading(false);
+      setControlChanged(false);
+    };
+    fetchJSON(createSkillUrl, payLoad, onSuccess, onError, onFinal);
+  };
+
+  const [controlChanged, setControlChanged] = React.useState<boolean>(false);
+
+  const updateSkillConfig = (key: string, value: string) => {
+    if (skill) {
+      const updatedSkill = { ...skill, [key]: value };
+      //   setSkill(updatedModelConfig);
+      setSkill(updatedSkill);
+    }
+    setControlChanged(true);
+  };
+
+  const hasChanged = !controlChanged && skill.id !== undefined;
+  const editorRef = React.useRef<any | null>(null);
+
+  return (
+    <div className="relative ">
+      {skill && (
+        <div style={{ minHeight: "65vh" }}>
+          <div className="flex gap-3">
+            <div className="h-ful flex-1 ">
+              <div className="mb-2 h-full" style={{ minHeight: "65vh" }}>
+                <div className="h-full mt-2" style={{ height: "65vh" }}>
+                  <MonacoEditor
+                    value={skill?.content}
+                    language="python"
+                    editorRef={editorRef}
+                    onChange={(value: string) => {
+                      updateSkillConfig("content", value);
+                    }}
+                  />
+                </div>
+              </div>
+            </div>
+            <div className="w-72 ">
+              <div className="">
+                <ControlRowView
+                  title="Name"
+                  className=""
+                  description="Skill name, should match function name"
+                  value={skill?.name || ""}
+                  control={
+                    <Input
+                      className="mt-2 w-full"
+                      value={skill?.name}
+                      onChange={(e) => {
+                        updateSkillConfig("name", e.target.value);
+                      }}
+                    />
+                  }
+                />
+
+                <ControlRowView
+                  title="Description"
+                  className="mt-4"
+                  description="Description of the skill"
+                  value={skill?.description || ""}
+                  control={
+                    <TextArea
+                      className="mt-2 w-full"
+                      value={skill?.description}
+                      onChange={(e) => {
+                        updateSkillConfig("description", e.target.value);
+                      }}
+                    />
+                  }
+                />
+
+                <ControlRowView
+                  title="Secrets"
+                  className="mt-4"
+                  description="Environment variables"
+                  value=""
+                  control={
+                    <SecretsEditor
+                      secrets={skill?.secrets || []}
+                      updateSkillConfig={updateSkillConfig}
+                    />
+                  }
+                />
+              </div>
+            </div>
+          </div>
+        </div>
+      )}
+
+      <div className="w-full mt-4 text-right">
+        {/* <Button
+          key="test"
+          type="primary"
+          loading={loading}
+          onClick={() => {
+            if (skill) {
+              testModel(skill);
+            }
+          }}
+        >
+          Test Model
+        </Button> */}
+
+        {!hasChanged && (
+          <Button
+            className="ml-2"
+            key="save"
+            type="primary"
+            onClick={() => {
+              if (skill) {
+                createSkill(skill);
+                setSkill(skill);
+              }
+            }}
+          >
+            {skill?.id ? "Update Skill" : "Save Skill"}
+          </Button>
+        )}
+
+        <Button
+          className="ml-2"
+          key="close"
+          type="default"
+          onClick={() => {
+            close();
+          }}
+        >
+          Close
+        </Button>
+      </div>
+    </div>
+  );
+};
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/workflowconfig.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/workflowconfig.tsx
index 8b97f311862..c42c2e9be30 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/workflowconfig.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/utils/workflowconfig.tsx
@@ -165,7 +165,7 @@ export const WorkflowViewConfig = ({
         )}
         {workflow?.id && (
           <Button
-            className="ml-2"
+            className="ml-2 text-primary"
             type="primary"
             onClick={() => {
               setDrawerOpen(true);
@@ -176,7 +176,7 @@ export const WorkflowViewConfig = ({
         )}
         <Button
           className="ml-2"
-          key="close"
+          key="close  text-primary"
           type="default"
           onClick={() => {
             close();
@@ -258,7 +258,7 @@ export const WorflowViewer = ({
         key: "2",
         children: (
           <>
-            <WorkflowAgentSelector workflowId={workflow.id} />{" "}
+            <WorkflowAgentSelector workflow={workflow} />{" "}
           </>
         ),
       });
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/builder/workflow.tsx b/samples/apps/autogen-studio/frontend/src/components/views/builder/workflow.tsx
index 12800de8022..025ad77c7dd 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/builder/workflow.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/builder/workflow.tsx
@@ -1,6 +1,7 @@
 import {
   ArrowDownTrayIcon,
   ArrowUpTrayIcon,
+  CodeBracketSquareIcon,
   DocumentDuplicateIcon,
   InformationCircleIcon,
   PlusIcon,
@@ -15,13 +16,13 @@ import { appContext } from "../../../hooks/provider";
 import {
   fetchJSON,
   getServerUrl,
-  sampleWorkflowConfig,
   sanitizeConfig,
   timeAgo,
   truncateText,
 } from "../../utils";
 import { BounceLoader, Card, CardHoverBar, LoadingOverlay } from "../../atoms";
 import { WorflowViewer } from "./utils/workflowconfig";
+import { ExportWorkflowModal } from "./utils/export";
 
 const WorkflowView = ({}: any) => {
   const [loading, setLoading] = React.useState(false);
@@ -37,10 +38,15 @@ const WorkflowView = ({}: any) => {
   const [workflows, setWorkflows] = React.useState<IWorkflow[] | null>([]);
   const [selectedWorkflow, setSelectedWorkflow] =
     React.useState<IWorkflow | null>(null);
+  const [selectedExportWorkflow, setSelectedExportWorkflow] =
+    React.useState<IWorkflow | null>(null);
 
-  const defaultConfig = sampleWorkflowConfig();
+  const sampleWorkflow: IWorkflow = {
+    name: "Sample Agent Workflow",
+    description: "Sample Agent Workflow",
+  };
   const [newWorkflow, setNewWorkflow] = React.useState<IWorkflow | null>(
-    defaultConfig
+    sampleWorkflow
   );
 
   const [showWorkflowModal, setShowWorkflowModal] = React.useState(false);
@@ -119,9 +125,21 @@ const WorkflowView = ({}: any) => {
     }
   }, [selectedWorkflow]);
 
+  const [showExportModal, setShowExportModal] = React.useState(false);
+
   const workflowRows = (workflows || []).map(
     (workflow: IWorkflow, i: number) => {
       const cardItems = [
+        {
+          title: "Export",
+          icon: CodeBracketSquareIcon,
+          onClick: (e: any) => {
+            e.stopPropagation();
+            setSelectedExportWorkflow(workflow);
+            setShowExportModal(true);
+          },
+          hoverText: "Export",
+        },
         {
           title: "Download",
           icon: ArrowDownTrayIcon,
@@ -145,13 +163,8 @@ const WorkflowView = ({}: any) => {
           icon: DocumentDuplicateIcon,
           onClick: (e: any) => {
             e.stopPropagation();
-            let newWorkflow = { ...workflow };
-            newWorkflow.name = `${workflow.name} Copy`;
-            newWorkflow.user_id = user?.email;
-            if (newWorkflow.id) {
-              delete newWorkflow.id;
-            }
-
+            let newWorkflow = { ...sanitizeConfig(workflow) };
+            newWorkflow.name = `${workflow.name}_copy`;
             setNewWorkflow(newWorkflow);
             setShowNewWorkflowModal(true);
           },
@@ -185,7 +198,7 @@ const WorkflowView = ({}: any) => {
               className="break-words  my-2"
               aria-hidden="true"
             >
-              {" "}
+              <div className="text-xs mb-2">{workflow.type}</div>{" "}
               {truncateText(workflow.description, 70)}
             </div>
             <div
@@ -285,28 +298,28 @@ const WorkflowView = ({}: any) => {
   };
 
   const workflowTypes: MenuProps["items"] = [
-    {
-      key: "twoagents",
-      label: (
-        <div>
-          {" "}
-          <UsersIcon className="w-5 h-5 inline-block mr-2" />
-          Two Agents
-        </div>
-      ),
-    },
-    {
-      key: "groupchat",
-      label: (
-        <div>
-          <UserGroupIcon className="w-5 h-5 inline-block mr-2" />
-          Group Chat
-        </div>
-      ),
-    },
-    {
-      type: "divider",
-    },
+    // {
+    //   key: "twoagents",
+    //   label: (
+    //     <div>
+    //       {" "}
+    //       <UsersIcon className="w-5 h-5 inline-block mr-2" />
+    //       Two Agents
+    //     </div>
+    //   ),
+    // },
+    // {
+    //   key: "groupchat",
+    //   label: (
+    //     <div>
+    //       <UserGroupIcon className="w-5 h-5 inline-block mr-2" />
+    //       Group Chat
+    //     </div>
+    //   ),
+    // },
+    // {
+    //   type: "divider",
+    // },
     {
       key: "uploadworkflow",
       label: (
@@ -328,7 +341,7 @@ const WorkflowView = ({}: any) => {
       uploadWorkflow();
       return;
     }
-    showWorkflow(sampleWorkflowConfig(key));
+    showWorkflow(sampleWorkflow);
   };
 
   return (
@@ -352,6 +365,12 @@ const WorkflowView = ({}: any) => {
         }}
       />
 
+      <ExportWorkflowModal
+        workflow={selectedExportWorkflow}
+        show={showExportModal}
+        setShow={setShowExportModal}
+      />
+
       <div className="mb-2   relative">
         <div className="     rounded  ">
           <div className="flex mt-2 pb-2 mb-2 border-b">
@@ -366,7 +385,7 @@ const WorkflowView = ({}: any) => {
                 placement="bottomRight"
                 trigger={["click"]}
                 onClick={() => {
-                  showWorkflow(sampleWorkflowConfig());
+                  showWorkflow(sampleWorkflow);
                 }}
               >
                 <PlusIcon className="w-5 h-5 inline-block mr-1" />
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/playground/chatbox.tsx b/samples/apps/autogen-studio/frontend/src/components/views/playground/chatbox.tsx
index 737269ea262..e0b10f3b115 100644
--- a/samples/apps/autogen-studio/frontend/src/components/views/playground/chatbox.tsx
+++ b/samples/apps/autogen-studio/frontend/src/components/views/playground/chatbox.tsx
@@ -1,15 +1,18 @@
 import {
   ArrowPathIcon,
+  ChatBubbleLeftRightIcon,
   Cog6ToothIcon,
   DocumentDuplicateIcon,
   ExclamationTriangleIcon,
   InformationCircleIcon,
   PaperAirplaneIcon,
+  SignalSlashIcon,
 } from "@heroicons/react/24/outline";
 import {
   Button,
   Dropdown,
   MenuProps,
+  Tabs,
   message as ToastMessage,
   Tooltip,
   message,
@@ -33,6 +36,7 @@ import {
   MarkdownView,
 } from "../../atoms";
 import { useConfigStore } from "../../../hooks/store";
+import ProfilerView from "./utils/profiler";
 
 let socketMsgs: any[] = [];
 
@@ -93,7 +97,7 @@ const ChatBox = ({
   const messages = useConfigStore((state) => state.messages);
   const setMessages = useConfigStore((state) => state.setMessages);
 
-  const parseMessage = (message: any) => {
+  const parseMessage = (message: IMessage) => {
     let meta;
     try {
       meta = JSON.parse(message.meta);
@@ -104,7 +108,7 @@ const ChatBox = ({
       text: message.content,
       sender: message.role === "user" ? "user" : "bot",
       meta: meta,
-      msg_id: message.msg_id,
+      id: message.id,
     };
     return msg;
   };
@@ -237,10 +241,45 @@ const ChatBox = ({
                 />
               </div>
             )}
-            {message.meta && (
-              <div className="">
-                <MetaDataView metadata={message.meta} />
-              </div>
+            {message.meta && !isUser && (
+              <>
+                {" "}
+                <Tabs
+                  defaultActiveKey="1"
+                  items={[
+                    {
+                      label: (
+                        <>
+                          {" "}
+                          <ChatBubbleLeftRightIcon className="h-4 w-4 inline-block mr-1" />
+                          Agent Messages
+                        </>
+                      ),
+                      key: "1",
+                      children: (
+                        <div className="text-primary">
+                          <MetaDataView metadata={message.meta} />
+                        </div>
+                      ),
+                    },
+                    {
+                      label: (
+                        <div>
+                          {" "}
+                          <SignalSlashIcon className="h-4 w-4 inline-block mr-1" />{" "}
+                          Profiler
+                        </div>
+                      ),
+                      key: "2",
+                      children: (
+                        <div className="text-primary">
+                          <ProfilerView agentMessage={message} />
+                        </div>
+                      ),
+                    },
+                  ]}
+                />
+              </>
             )}
           </div>
         </div>
@@ -409,7 +448,6 @@ const ChatBox = ({
     const userMessage: IChatMessage = {
       text: query,
       sender: "user",
-      msg_id: guid(),
     };
     messageHolder.push(userMessage);
     setMessages(messageHolder);
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/playground/utils/charts/bar.tsx b/samples/apps/autogen-studio/frontend/src/components/views/playground/utils/charts/bar.tsx
new file mode 100644
index 00000000000..09f6443b71a
--- /dev/null
+++ b/samples/apps/autogen-studio/frontend/src/components/views/playground/utils/charts/bar.tsx
@@ -0,0 +1,58 @@
+import { Bar, Line } from "@ant-design/plots";
+import * as React from "react";
+import { IStatus } from "../../../../types";
+
+const BarChartViewer = ({ data }: { data: any | null }) => {
+  const [error, setError] = React.useState<IStatus | null>({
+    status: true,
+    message: "All good",
+  });
+
+  const [loading, setLoading] = React.useState(false);
+
+  const config = {
+    data: data.bar,
+    xField: "agent",
+    yField: "message",
+    colorField: "tool_call",
+    stack: true,
+    axis: {
+      y: { labelFormatter: "" },
+      x: {
+        labelSpacing: 4,
+      },
+    },
+    style: {
+      radiusTopLeft: 10,
+      radiusTopRight: 10,
+    },
+    height: 60 * data.agents.length,
+  };
+
+  const config_code_exec = Object.assign({}, config);
+  config_code_exec.colorField = "code_execution";
+
+  return (
+    <div className="bg-white  rounded relative">
+      <div>
+        <div className="grid grid-cols-2">
+          <div>
+            <div className=" text-gray-700  border-b border-dashed p-4">
+              {" "}
+              Tool Call
+            </div>
+            <Bar {...config} />
+          </div>
+          <div className=" ">
+            <div className=" text-gray-700  border-b border-dashed p-4">
+              {" "}
+              Code Execution Status
+            </div>
+            <Bar {...config_code_exec} />
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+};
+export default BarChartViewer;
diff --git a/samples/apps/autogen-studio/frontend/src/components/views/playground/utils/profiler.tsx b/samples/apps/autogen-studio/frontend/src/components/views/playground/utils/profiler.tsx
new file mode 100644
index 00000000000..a2c3e747e75
--- /dev/null
+++ b/samples/apps/autogen-studio/frontend/src/components/views/playground/utils/profiler.tsx
@@ -0,0 +1,125 @@
+import { Tooltip, message } from "antd";
+import * as React from "react";
+import { IStatus, IChatMessage } from "../../../types";
+import { fetchJSON, getServerUrl } from "../../../utils";
+import { appContext } from "../../../../hooks/provider";
+import { InformationCircleIcon } from "@heroicons/react/24/outline";
+
+const BarChartViewer = React.lazy(() => import("./charts/bar"));
+
+const ProfilerView = ({
+  agentMessage,
+}: {
+  agentMessage: IChatMessage | null;
+}) => {
+  const [error, setError] = React.useState<IStatus | null>({
+    status: true,
+    message: "All good",
+  });
+
+  const [loading, setLoading] = React.useState(false);
+  const [profile, setProfile] = React.useState<any | null>(null);
+
+  const { user } = React.useContext(appContext);
+  const serverUrl = getServerUrl();
+
+  const fetchProfile = (messageId: number) => {
+    const profilerUrl = `${serverUrl}/profiler/${messageId}?user_id=${user?.email}`;
+    setError(null);
+    setLoading(true);
+    const payLoad = {
+      method: "GET",
+      headers: {
+        "Content-Type": "application/json",
+      },
+    };
+
+    const onSuccess = (data: any) => {
+      console.log(data);
+      if (data && data.status) {
+        setProfile(data.data);
+        setTimeout(() => {
+          // scroll parent to bottom
+          const parent = document.getElementById("chatbox");
+          if (parent) {
+            parent.scrollTop = parent.scrollHeight;
+          }
+        }, 4000);
+      } else {
+        message.error(data.message);
+      }
+      setLoading(false);
+    };
+    const onError = (err: any) => {
+      setError(err);
+      message.error(err.message);
+      setLoading(false);
+    };
+    fetchJSON(profilerUrl, payLoad, onSuccess, onError);
+  };
+
+  React.useEffect(() => {
+    if (user && agentMessage && agentMessage.id) {
+      fetchProfile(agentMessage.id);
+    }
+  }, []);
+
+  const UsageViewer = ({ usage }: { usage: any }) => {
+    const usageRows = usage.map((usage: any, index: number) => (
+      <div key={index} className="  borpder  rounded">
+        {(usage.total_cost != 0 || usage.total_tokens != 0) && (
+          <>
+            <div className="bg-secondary p-2 text-xs rounded-t">
+              {usage.agent}
+            </div>
+            <div className="bg-tertiary p-3 rounded-b inline-flex gap-2 w-full">
+              {usage.total_tokens && usage.total_tokens != 0 && (
+                <div className="flex flex-col text-center w-full">
+                  <div className="w-full  px-2 text-2xl ">
+                    {usage.total_tokens}
+                  </div>
+                  <div className="w-full text-xs">tokens</div>
+                </div>
+              )}
+              {usage.total_cost && usage.total_cost != 0 && (
+                <div className="flex flex-col text-center w-full">
+                  <div className="w-full px-2  text-2xl ">
+                    {usage.total_cost?.toFixed(3)}
+                  </div>
+                  <div className="w-full text-xs">USD</div>
+                </div>
+              )}
+            </div>
+          </>
+        )}
+      </div>
+    ));
+    return (
+      <div className="inline-flex gap-3  flex-wrap">{usage && usageRows}</div>
+    );
+  };
+
+  return (
+    <div className="   relative">
+      <div className="text-sm   ">
+        {/* {profile && <RadarMetrics profileData={profile} />} */}
+        {profile && <BarChartViewer data={profile} />}
+
+        <div className="mt-4">
+          <div className="mt-4  mb-4  txt">
+            LLM Costs
+            <Tooltip
+              title={
+                "LLM tokens below based on data returned by the model. Support for exact costs may vary."
+              }
+            >
+              <InformationCircleIcon className="ml-1 text-gray-400 inline-block w-4 h-4" />
+            </Tooltip>
+          </div>
+          {profile && profile.usage && <UsageViewer usage={profile.usage} />}
+        </div>
+      </div>
+    </div>
+  );
+};
+export default ProfilerView;
diff --git a/samples/apps/autogen-studio/frontend/src/styles/global.css b/samples/apps/autogen-studio/frontend/src/styles/global.css
index a46b3712ded..2d1517497de 100644
--- a/samples/apps/autogen-studio/frontend/src/styles/global.css
+++ b/samples/apps/autogen-studio/frontend/src/styles/global.css
@@ -289,7 +289,8 @@ iiz__zoom-img {
 .ant-modal-footer {
   @apply border-secondary !important;
 }
-.ant-btn {
+.ant-btn,
+.ant-btn:hover {
   @apply text-primary !important;
 }
 :where(.ant-btn).ant-btn-compact-item.ant-btn-primary:not([disabled])
@@ -333,6 +334,12 @@ iiz__zoom-img {
   @apply bg-primary text-primary !important;
 }
 
+.ant-dropdown-menu {
+  max-height: 250px;
+  overflow: auto;
+  @apply scroll !important;
+}
+
 /* .ant-radio-input::before {
   @apply bg-primary !important;
 } */
diff --git a/samples/apps/autogen-studio/notebooks/agent_spec.json b/samples/apps/autogen-studio/notebooks/agent_spec.json
deleted file mode 100644
index 72d1e21ef1a..00000000000
--- a/samples/apps/autogen-studio/notebooks/agent_spec.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
-  "name": "General Agent Workflow",
-  "description": "A general agent workflow",
-  "sender": {
-    "type": "userproxy",
-    "config": {
-      "name": "userproxy",
-      "human_input_mode": "NEVER",
-      "max_consecutive_auto_reply": 5,
-      "system_message": "",
-      "llm_config": false,
-      "code_execution_config": {
-        "work_dir": null,
-        "use_docker": false
-      }
-    }
-  },
-  "receiver": {
-    "type": "assistant",
-    "config": {
-      "name": "primary_assistant",
-      "llm_config": {
-        "config_list": [
-          {
-            "model": "gpt-4-1106-preview"
-          }
-        ],
-        "temperature": 0.1,
-        "timeout": 600,
-        "cache_seed": 42
-      },
-      "human_input_mode": "NEVER",
-      "max_consecutive_auto_reply": 8,
-      "system_message": "You are a helpful assistant that can use available functions when needed to solve problems. At each point, do your best to determine if the user's request has been addressed. IF THE REQUEST HAS NOT BEEN ADDRESSED, RESPOND WITH CODE TO ADDRESS IT. IF A FAILURE OCCURRED (e.g., due to a missing library) AND SOME ADDITIONAL CODE WAS WRITTEN (e.g. code to install the library), ENSURE THAT THE ORIGINAL CODE TO ADDRESS THE TASK STILL GETS EXECUTED. If the request HAS been addressed, respond with a summary of the result. The summary must be written as a coherent helpful response to the user request e.g. 'Sure, here is result to your request ' or 'The tallest mountain in Africa is ..' etc. The summary MUST end with the word TERMINATE. If the  user request is pleasantry or greeting, you should respond with a pleasantry or greeting and TERMINATE."
-    }
-  },
-  "type": "twoagents"
-}
diff --git a/samples/apps/autogen-studio/notebooks/groupchat_spec.json b/samples/apps/autogen-studio/notebooks/groupchat_spec.json
deleted file mode 100644
index 21cced7135b..00000000000
--- a/samples/apps/autogen-studio/notebooks/groupchat_spec.json
+++ /dev/null
@@ -1,103 +0,0 @@
-{
-  "name": "Travel Agent Group Chat Workflow",
-  "description": "A group chat workflow",
-  "type": "groupchat",
-  "sender": {
-    "type": "userproxy",
-    "config": {
-      "name": "userproxy",
-      "human_input_mode": "NEVER",
-      "max_consecutive_auto_reply": 5,
-      "system_message": "",
-      "llm_config": false,
-      "code_execution_config": {
-        "work_dir": null,
-        "use_docker": false
-      }
-    }
-  },
-  "receiver": {
-    "type": "groupchat",
-    "description": "A group chat workflow",
-    "config": {
-      "name": "group_chat_manager",
-      "llm_config": {
-        "config_list": [
-          {
-            "model": "gpt-4-1106-preview"
-          }
-        ],
-        "temperature": 0.1,
-        "timeout": 600,
-        "cache_seed": 42
-      },
-      "human_input_mode": "NEVER",
-      "system_message": "Group chat manager"
-    },
-    "groupchat_config": {
-      "admin_name": "Admin",
-      "max_round": 10,
-      "speaker_selection_method": "auto",
-
-      "agents": [
-        {
-          "type": "assistant",
-          "config": {
-            "name": "primary_assistant",
-            "llm_config": {
-              "config_list": [
-                {
-                  "model": "gpt-4-1106-preview"
-                }
-              ],
-              "temperature": 0.1,
-              "timeout": 600,
-              "cache_seed": 42
-            },
-            "human_input_mode": "NEVER",
-            "max_consecutive_auto_reply": 8,
-            "system_message": "You are a helpful assistant that can suggest a travel itinerary for a user. You are the primary cordinator who will receive suggestions or advice from other agents (local_assistant, language_assistant). You must ensure that the finally plan integrates the suggestions from other agents or team members. YOUR FINAL RESPONSE MUST BE THE COMPLETE PLAN that ends with the word TERMINATE. "
-          }
-        },
-        {
-          "type": "assistant",
-          "config": {
-            "name": "local_assistant",
-            "llm_config": {
-              "config_list": [
-                {
-                  "model": "gpt-4-1106-preview"
-                }
-              ],
-              "temperature": 0.1,
-              "timeout": 600,
-              "cache_seed": 42
-            },
-            "human_input_mode": "NEVER",
-            "max_consecutive_auto_reply": 8,
-            "system_message": "You are a helpful assistant that can review travel plans, providing critical feedback on how the trip can be enriched for enjoyment of the local culture. If the plan already includes local experiences, you can mention that the plan is satisfactory, with rationale."
-          }
-        },
-        {
-          "type": "assistant",
-          "config": {
-            "name": "language_assistant",
-            "llm_config": {
-              "config_list": [
-                {
-                  "model": "gpt-4-1106-preview"
-                }
-              ],
-              "temperature": 0.1,
-              "timeout": 600,
-              "cache_seed": 42
-            },
-            "human_input_mode": "NEVER",
-            "max_consecutive_auto_reply": 8,
-            "system_message": "You are a helpful assistant that can review travel plans, providing feedback on important/critical tips about how best to address language or communication challenges for the given destination. If the plan already includes language tips, you can mention that the plan is satisfactory, with rationale."
-          }
-        }
-      ]
-    }
-  }
-}
diff --git a/samples/apps/autogen-studio/notebooks/travel_groupchat.json b/samples/apps/autogen-studio/notebooks/travel_groupchat.json
new file mode 100644
index 00000000000..efcff443ef1
--- /dev/null
+++ b/samples/apps/autogen-studio/notebooks/travel_groupchat.json
@@ -0,0 +1,273 @@
+{
+    "user_id": "guestuser@gmail.com",
+    "name": "Travel Planning Workflow",
+    "type": "autonomous",
+    "sample_tasks": [
+        "Plan a 3 day trip to Hawaii Islands.",
+        "Plan an eventful and exciting trip to Uzbeksitan."
+    ],
+    "version": "0.0.1",
+    "description": "Travel workflow",
+    "summary_method": "llm",
+    "agents": [
+        {
+            "agent": {
+                "version": "0.0.1",
+                "config": {
+                    "name": "user_proxy",
+                    "human_input_mode": "NEVER",
+                    "max_consecutive_auto_reply": 25,
+                    "system_message": "You are a helpful assistant",
+                    "is_termination_msg": null,
+                    "code_execution_config": "local",
+                    "default_auto_reply": "TERMINATE",
+                    "description": "User Proxy Agent Configuration",
+                    "llm_config": false,
+                    "admin_name": "Admin",
+                    "messages": [],
+                    "max_round": 100,
+                    "speaker_selection_method": "auto",
+                    "allow_repeat_speaker": true
+                },
+                "user_id": "guestuser@gmail.com",
+                "type": "userproxy",
+                "task_instruction": null,
+                "skills": [],
+                "models": [],
+                "agents": []
+            },
+            "link": {
+                "agent_id": 52,
+                "workflow_id": 18,
+                "agent_type": "sender",
+                "sequence_id": 0
+            }
+        },
+        {
+            "agent": {
+                "version": "0.0.1",
+                "config": {
+                    "name": "travel_groupchat",
+                    "human_input_mode": "NEVER",
+                    "max_consecutive_auto_reply": 25,
+                    "system_message": "You are a group chat manager",
+                    "is_termination_msg": null,
+                    "code_execution_config": "none",
+                    "default_auto_reply": "TERMINATE",
+                    "description": "Group Chat Agent Configuration",
+                    "llm_config": {
+                        "config_list": [
+                            {
+                                "api_type": "open_ai",
+                                "model": "gpt-4-1106-preview",
+                                "base_url": null,
+                                "api_version": null
+                            }
+                        ],
+                        "temperature": 0,
+                        "cache_seed": null,
+                        "timeout": null,
+                        "max_tokens": 2048,
+                        "extra_body": null
+                    },
+                    "admin_name": "groupchat",
+                    "messages": [],
+                    "max_round": 100,
+                    "speaker_selection_method": "auto",
+                    "allow_repeat_speaker": true
+                },
+                "user_id": "guestuser@gmail.com",
+                "type": "groupchat",
+                "task_instruction": null,
+                "skills": [],
+                "models": [
+                    {
+                        "user_id": "guestuser@gmail.com",
+                        "api_type": "open_ai",
+                        "description": "OpenAI GPT-4 model",
+                        "model": "gpt-4-1106-preview",
+                        "base_url": null,
+                        "api_version": null
+                    }
+                ],
+                "agents": [
+                    {
+                        "version": "0.0.1",
+                        "config": {
+                            "name": "user_proxy",
+                            "human_input_mode": "NEVER",
+                            "max_consecutive_auto_reply": 25,
+                            "system_message": "You are a helpful assistant",
+                            "is_termination_msg": null,
+                            "code_execution_config": "local",
+                            "default_auto_reply": "TERMINATE",
+                            "description": "User Proxy Agent Configuration",
+                            "llm_config": false,
+                            "admin_name": "Admin",
+                            "messages": [],
+                            "max_round": 100,
+                            "speaker_selection_method": "auto",
+                            "allow_repeat_speaker": true
+                        },
+                        "user_id": "guestuser@gmail.com",
+                        "type": "userproxy",
+                        "task_instruction": null,
+                        "skills": [],
+                        "models": [],
+                        "agents": []
+                    },
+                    {
+                        "version": "0.0.1",
+                        "config": {
+                            "name": "planner_assistant",
+                            "human_input_mode": "NEVER",
+                            "max_consecutive_auto_reply": 25,
+                            "system_message": "You are a helpful assistant that can suggest a travel plan for a user and utilize any context information provided. Do not ask user for additional context. You are the primary cordinator who will receive suggestions or advice from other agents (local_assistant, language_assistant). You must ensure that the finally plan integrates the suggestions from other agents or team members. YOUR FINAL RESPONSE MUST BE THE COMPLETE PLAN. When the plan is complete and all perspectives are integrated, you can respond with TERMINATE.",
+                            "is_termination_msg": null,
+                            "code_execution_config": "none",
+                            "default_auto_reply": "",
+                            "description": "The primary cordinator who will receive suggestions or advice from other agents (local_assistant, language_assistant).",
+                            "llm_config": {
+                                "config_list": [
+                                    {
+                                        "api_type": "open_ai",
+                                        "model": "gpt-4-1106-preview",
+                                        "base_url": null,
+                                        "api_version": null
+                                    }
+                                ],
+                                "temperature": 0,
+                                "cache_seed": null,
+                                "timeout": null,
+                                "max_tokens": 2048,
+                                "extra_body": null
+                            },
+                            "admin_name": "Admin",
+                            "messages": [],
+                            "max_round": 100,
+                            "speaker_selection_method": "auto",
+                            "allow_repeat_speaker": true
+                        },
+                        "user_id": "guestuser@gmail.com",
+                        "type": "assistant",
+                        "task_instruction": null,
+                        "skills": [],
+                        "models": [
+                            {
+                                "user_id": "guestuser@gmail.com",
+                                "api_type": "open_ai",
+                                "description": "OpenAI GPT-4 model",
+                                "model": "gpt-4-1106-preview",
+                                "base_url": null,
+                                "api_version": null
+                            }
+                        ],
+                        "agents": []
+                    },
+                    {
+                        "version": "0.0.1",
+                        "config": {
+                            "name": "local_assistant",
+                            "human_input_mode": "NEVER",
+                            "max_consecutive_auto_reply": 25,
+                            "system_message": "You are a local assistant that can suggest local activities or places to visit for a user and can utilize any context information provided. You can suggest local activities, places to visit, restaurants to eat at, etc. You can also provide information about the weather, local events, etc. You can provide information about the local area. Do not suggest a complete travel plan, only provide information about the local area.",
+                            "is_termination_msg": null,
+                            "code_execution_config": "none",
+                            "default_auto_reply": "",
+                            "description": "Local Assistant Agent",
+                            "llm_config": {
+                                "config_list": [
+                                    {
+                                        "api_type": "open_ai",
+                                        "model": "gpt-4-1106-preview",
+                                        "base_url": null,
+                                        "api_version": null
+                                    }
+                                ],
+                                "temperature": 0,
+                                "cache_seed": null,
+                                "timeout": null,
+                                "max_tokens": 2048,
+                                "extra_body": null
+                            },
+                            "admin_name": "Admin",
+                            "messages": [],
+                            "max_round": 100,
+                            "speaker_selection_method": "auto",
+                            "allow_repeat_speaker": true
+                        },
+                        "user_id": "guestuser@gmail.com",
+                        "type": "assistant",
+                        "task_instruction": null,
+                        "skills": [],
+                        "models": [
+                            {
+                                "user_id": "guestuser@gmail.com",
+                                "api_type": "open_ai",
+                                "description": "OpenAI GPT-4 model",
+                                "model": "gpt-4-1106-preview",
+                                "base_url": null,
+                                "api_version": null
+                            }
+                        ],
+                        "agents": []
+                    },
+                    {
+                        "version": "0.0.1",
+                        "config": {
+                            "name": "language_assistant",
+                            "human_input_mode": "NEVER",
+                            "max_consecutive_auto_reply": 25,
+                            "system_message": "You are a helpful assistant that can review travel plans, providing feedback on important/critical tips about how best to address language or communication challenges for the given destination. If the plan already includes language tips, you can mention that the plan is satisfactory, with rationale.",
+                            "is_termination_msg": null,
+                            "code_execution_config": "none",
+                            "default_auto_reply": "",
+                            "description": "Language Assistant Agent",
+                            "llm_config": {
+                                "config_list": [
+                                    {
+                                        "api_type": "open_ai",
+                                        "model": "gpt-4-1106-preview",
+                                        "base_url": null,
+                                        "api_version": null
+                                    }
+                                ],
+                                "temperature": 0,
+                                "cache_seed": null,
+                                "timeout": null,
+                                "max_tokens": 2048,
+                                "extra_body": null
+                            },
+                            "admin_name": "Admin",
+                            "messages": [],
+                            "max_round": 100,
+                            "speaker_selection_method": "auto",
+                            "allow_repeat_speaker": true
+                        },
+                        "user_id": "guestuser@gmail.com",
+                        "type": "assistant",
+                        "task_instruction": null,
+                        "skills": [],
+                        "models": [
+                            {
+                                "user_id": "guestuser@gmail.com",
+                                "api_type": "open_ai",
+                                "description": "OpenAI GPT-4 model",
+                                "model": "gpt-4-1106-preview",
+                                "base_url": null,
+                                "api_version": null
+                              }
+                        ],
+                        "agents": []
+                    }
+                ]
+            },
+            "link": {
+                "agent_id": 54,
+                "workflow_id": 18,
+                "agent_type": "receiver",
+                "sequence_id": 0
+            }
+        }
+    ]
+}
diff --git a/samples/apps/autogen-studio/notebooks/tutorial.ipynb b/samples/apps/autogen-studio/notebooks/tutorial.ipynb
index 7e80f17b7b5..4f1f0ce6145 100644
--- a/samples/apps/autogen-studio/notebooks/tutorial.ipynb
+++ b/samples/apps/autogen-studio/notebooks/tutorial.ipynb
@@ -2,13 +2,11 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import json\n",
-    "\n",
-    "from autogenstudio import AgentWorkFlowConfig, AutoGenWorkFlowManager"
+    "from autogenstudio import WorkflowManager"
    ]
   },
   {
@@ -28,67 +26,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muserproxy\u001b[0m (to primary_assistant):\n",
-      "\n",
-      "What is the height of the Eiffel Tower?. Dont write code, just respond to the question.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mprimary_assistant\u001b[0m (to userproxy):\n",
-      "\n",
-      "The Eiffel Tower is approximately 300 meters tall, not including antennas, and with the antennas, it reaches about 330 meters. TERMINATE.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# load an agent specification in JSON\n",
-    "agent_spec = json.load(open(\"agent_spec.json\"))\n",
-    "\n",
-    "# Create a An AutoGen Workflow Configuration from the agent specification\n",
-    "agent_work_flow_config = AgentWorkFlowConfig(**agent_spec)\n",
+    "# load workflow from json file\n",
+    "workflow_manager = WorkflowManager(workflow=\"two_agent.json\")\n",
     "\n",
-    "agent_work_flow = AutoGenWorkFlowManager(agent_work_flow_config)\n",
-    "\n",
-    "# # Run the workflow on a task\n",
+    "# run the workflow on a task\n",
     "task_query = \"What is the height of the Eiffel Tower?. Dont write code, just respond to the question.\"\n",
-    "agent_work_flow.run(message=task_query)"
+    "workflow_manager.run(message=task_query)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'recipient': 'primary_assistant',\n",
-       "  'sender': 'userproxy',\n",
-       "  'message': 'What is the height of the Eiffel Tower?. Dont write code, just respond to the question.',\n",
-       "  'timestamp': '2024-02-07T12:34:35.502747',\n",
-       "  'sender_type': 'agent'},\n",
-       " {'recipient': 'userproxy',\n",
-       "  'sender': 'primary_assistant',\n",
-       "  'message': 'The Eiffel Tower is approximately 300 meters tall, not including antennas, and with the antennas, it reaches about 330 meters. TERMINATE.',\n",
-       "  'timestamp': '2024-02-07T12:34:35.508855',\n",
-       "  'sender_type': 'agent'}]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "agent_work_flow.agent_history"
+    "# print the agent history\n",
+    "workflow_manager.agent_history"
    ]
   },
   {
@@ -100,289 +57,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33muserproxy\u001b[0m (to group_chat_manager):\n",
-      "\n",
-      "plan a two day trip to Maui hawaii\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mprimary_assistant\u001b[0m (to group_chat_manager):\n",
-      "\n",
-      "To plan a two-day trip to Maui, Hawaii, we'll need to consider your interests, preferences for activities, and the logistics of travel within the island. Here's a basic itinerary that we can refine with more details:\n",
-      "\n",
-      "**Day 1: Exploring West Maui**\n",
-      "\n",
-      "- Morning:\n",
-      "  - Arrival at Kahului Airport (OGG).\n",
-      "  - Pick up rental car.\n",
-      "  - Breakfast at a local café near the airport.\n",
-      "  - Drive to Lahaina, a historic whaling village.\n",
-      "\n",
-      "- Midday:\n",
-      "  - Visit Lahaina Historic Trail for a self-guided walking tour.\n",
-      "  - Lunch at a seaside restaurant in Lahaina.\n",
-      "\n",
-      "- Afternoon:\n",
-      "  - Snorkeling tour at Ka'anapali Beach.\n",
-      "  - Relax on the beach or by the hotel pool.\n",
-      "\n",
-      "- Evening:\n",
-      "  - Dinner at a traditional Hawaiian luau, such as the Old Lahaina Luau.\n",
-      "  - Return to hotel for overnight stay.\n",
-      "\n",
-      "**Day 2: The Road to Hana**\n",
-      "\n",
-      "- Early Morning:\n",
-      "  - Check out of the hotel.\n",
-      "  - Grab a quick breakfast and coffee to go.\n",
-      "\n",
-      "- Morning to Afternoon:\n",
-      "  - Begin the scenic drive on the Road to Hana.\n",
-      "  - Stop at Twin Falls for a short hike and swim.\n",
-      "  - Visit Waianapanapa State Park to see the black sand beach.\n",
-      "  - Picnic lunch at one of the many lookout points.\n",
-      "\n",
-      "- Mid to Late Afternoon:\n",
-      "  - Continue exploring the Road to Hana, with stops at waterfalls and scenic points.\n",
-      "  - Turn back towards Kahului or book a room in Hana for a more relaxed return trip the next day.\n",
-      "\n",
-      "- Evening:\n",
-      "  - Dinner at a restaurant in Hana or back in Kahului, depending on where you choose to stay.\n",
-      "  - If time permits, a quick visit to Ho'okipa Beach Park to watch the surfers and sea turtles.\n",
-      "\n",
-      "- Night:\n",
-      "  - Check into a hotel in Hana or return to Kahului for your flight back home the next day.\n",
-      "\n",
-      "This itinerary is just a starting point. Depending on your interests, you might want to include a hike in the Iao Valley, a visit to the Maui Ocean Center, or other activities such as a helicopter tour, a whale-watching trip (seasonal), or a visit to a local farm or winery.\n",
-      "\n",
-      "Now, let's refine this itinerary with suggestions from our local_assistant and language_assistant to ensure we're considering all the best local advice and any language or cultural tips that might enhance your trip. \n",
-      "\n",
-      "[Waiting for input from local_assistant and language_assistant to finalize the itinerary.]\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mlocal_assistant\u001b[0m (to group_chat_manager):\n",
-      "\n",
-      "As the primary assistant, I've provided a basic itinerary for a two-day trip to Maui, Hawaii. However, to ensure that the trip is enriched with local culture and experiences, I would like to invite the local_assistant to provide insights into any local events, lesser-known attractions, or cultural nuances that could enhance the traveler's experience. Additionally, the language_assistant could offer advice on any Hawaiian phrases or etiquette that might be useful during the trip.\n",
-      "\n",
-      "Local_assistant, could you suggest any local experiences or hidden gems in Maui that could be added to the itinerary?\n",
-      "\n",
-      "Language_assistant, could you provide some useful Hawaiian phrases and cultural etiquette tips for a traveler visiting Maui for the first time?\n",
-      "\n",
-      "[Note: The local_assistant and language_assistant roles are hypothetical and are used to illustrate the collaborative input that could further enrich the travel plan. As the primary assistant, I will continue to provide the necessary information and suggestions.]\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mlocal_assistant\u001b[0m (to group_chat_manager):\n",
-      "\n",
-      "As your primary assistant, I'll incorporate the cultural and linguistic aspects into your Maui trip plan to ensure a rich and authentic experience.\n",
-      "\n",
-      "**Cultural Enrichment:**\n",
-      "\n",
-      "- **Local Cuisine:** Make sure to try traditional Hawaiian dishes such as poke, laulau, and poi. Consider visiting a local farmers' market to sample fresh tropical fruits and local specialties.\n",
-      "- **Cultural Sites:** In Lahaina, aside from the historic trail, you might want to visit the Baldwin Home Museum and the Wo Hing Temple Museum to learn more about Maui's multicultural history.\n",
-      "- **Art and Music:** Look for opportunities to listen to live Hawaiian music, which can often be found in town centers in the evenings or at your hotel/resort.\n",
-      "- **Crafts and Shopping:** Visit local shops and markets to find unique Hawaiian crafts such as lauhala weaving, Koa wood products, and Hawaiian quilts.\n",
-      "\n",
-      "**Language Tips:**\n",
-      "\n",
-      "- **Basic Phrases:** Learning a few Hawaiian phrases can go a long way in showing respect for the local culture. Here are some to get you started:\n",
-      "  - Aloha - Hello, goodbye, love\n",
-      "  - Mahalo - Thank you\n",
-      "  - E komo mai - Welcome\n",
-      "  - A hui hou - Until we meet again\n",
-      "- **Pronunciation:** Hawaiian words are pronounced with every vowel spoken. For example, \"Haleakalā\" is pronounced \"Ha-lay-ah-ka-lah.\"\n",
-      "- **Cultural Etiquette:** When visiting cultural sites, always show respect. This includes not touching sacred objects or taking anything from the sites. Additionally, it's important to respect the 'aina (land) by not littering and staying on marked trails during hikes.\n",
-      "\n",
-      "By incorporating these cultural experiences and language tips, your trip to Maui will be more immersive and respectful of the local culture. Enjoy your adventure in this beautiful Hawaiian island!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mlanguage_assistant\u001b[0m (to group_chat_manager):\n",
-      "\n",
-      "Thank you for the local insights and language tips. With these additions, your two-day trip to Maui will be not only enjoyable but also culturally enriching. Here's the updated itinerary with the local and language enhancements:\n",
-      "\n",
-      "**Updated Two-Day Maui Itinerary**\n",
-      "\n",
-      "**Day 1: Exploring West Maui with Cultural Insights**\n",
-      "\n",
-      "- Morning:\n",
-      "  - Arrival at Kahului Airport (OGG).\n",
-      "  - Pick up rental car.\n",
-      "  - Breakfast at a local café, trying a Hawaiian breakfast specialty.\n",
-      "  - Drive to Lahaina, a historic whaling village.\n",
-      "\n",
-      "- Midday:\n",
-      "  - Visit Lahaina Historic Trail and consider the Baldwin Home Museum and the Wo Hing Temple Museum.\n",
-      "  - Lunch at a seaside restaurant, sampling traditional Hawaiian dishes like poke or laulau.\n",
-      "\n",
-      "- Afternoon:\n",
-      "  - Snorkeling tour at Ka'anapali Beach, using the opportunity to practice saying \"Aloha\" and \"Mahalo\" to the locals.\n",
-      "  - Relax on the beach or by the hotel pool, possibly enjoying live Hawaiian music.\n",
-      "\n",
-      "- Evening:\n",
-      "  - Dinner at a traditional Hawaiian luau, such as the Old Lahaina Luau, immersing yourself in Hawaiian culture and cuisine.\n",
-      "  - Return to hotel for overnight stay.\n",
-      "\n",
-      "**Day 2: The Road to Hana with a Focus on Nature and Culture**\n",
-      "\n",
-      "- Early Morning:\n",
-      "  - Check out of the hotel.\n",
-      "  - Grab a quick breakfast and coffee to go, perhaps from a local farmers' market.\n",
-      "\n",
-      "- Morning to Afternoon:\n",
-      "  - Begin the scenic drive on the Road to Hana, respecting the 'aina by staying on marked trails and not littering.\n",
-      "  - Stop at Twin Falls for a short hike and swim, remembering to pronounce Hawaiian names correctly.\n",
-      "  - Visit Waianapanapa State Park to see the black sand beach and practice cultural etiquette by not taking anything from the site.\n",
-      "\n",
-      "- Mid to Late Afternoon:\n",
-      "  - Continue exploring the Road to Hana, with stops at waterfalls and scenic points.\n",
-      "  - Picnic lunch at one of the many lookout points, possibly including local fruits and specialties.\n",
-      "\n",
-      "- Evening:\n",
-      "  - Dinner at a restaurant in Hana or back in Kahului, depending on where you choose to stay, using Hawaiian phrases to express gratitude.\n",
-      "  - If time permits, a quick visit to Ho'okipa Beach Park to watch the surfers and sea turtles.\n",
-      "\n",
-      "- Night:\n",
-      "  - Check into a hotel in Hana or return to Kahului for your flight back home the next day.\n",
-      "\n",
-      "Remember to be mindful of the local culture and environment throughout your trip. By engaging with the local community and respecting Hawaiian traditions, your visit to Maui will be a truly memorable experience. Enjoy your journey to this beautiful island paradise!\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "\u001b[33mprimary_assistant\u001b[0m (to group_chat_manager):\n",
-      "\n",
-      "**Final Two-Day Maui Itinerary with Cultural and Language Integration**\n",
-      "\n",
-      "**Day 1: Exploring West Maui with Cultural Insights**\n",
-      "\n",
-      "- Morning:\n",
-      "  - Arrival at Kahului Airport (OGG).\n",
-      "  - Pick up rental car.\n",
-      "  - Breakfast at a local café, trying a Hawaiian breakfast specialty.\n",
-      "  - Drive to Lahaina, a historic whaling village.\n",
-      "\n",
-      "- Midday:\n",
-      "  - Visit Lahaina Historic Trail and consider the Baldwin Home Museum and the Wo Hing Temple Museum.\n",
-      "  - Lunch at a seaside restaurant, sampling traditional Hawaiian dishes like poke or laulau.\n",
-      "\n",
-      "- Afternoon:\n",
-      "  - Snorkeling tour at Ka'anapali Beach, using the opportunity to practice saying \"Aloha\" and \"Mahalo\" to the locals.\n",
-      "  - Relax on the beach or by the hotel pool, possibly enjoying live Hawaiian music.\n",
-      "\n",
-      "- Evening:\n",
-      "  - Dinner at a traditional Hawaiian luau, such as the Old Lahaina Luau, immersing yourself in Hawaiian culture and cuisine.\n",
-      "  - Return to hotel for overnight stay.\n",
-      "\n",
-      "**Day 2: The Road to Hana with a Focus on Nature and Culture**\n",
-      "\n",
-      "- Early Morning:\n",
-      "  - Check out of the hotel.\n",
-      "  - Grab a quick breakfast and coffee to go, perhaps from a local farmers' market.\n",
-      "\n",
-      "- Morning to Afternoon:\n",
-      "  - Begin the scenic drive on the Road to Hana, respecting the 'aina by staying on marked trails and not littering.\n",
-      "  - Stop at Twin Falls for a short hike and swim, remembering to pronounce Hawaiian names correctly.\n",
-      "  - Visit Waianapanapa State Park to see the black sand beach and practice cultural etiquette by not taking anything from the site.\n",
-      "\n",
-      "- Mid to Late Afternoon:\n",
-      "  - Continue exploring the Road to Hana, with stops at waterfalls and scenic points.\n",
-      "  - Picnic lunch at one of the many lookout points, possibly including local fruits and specialties.\n",
-      "\n",
-      "- Evening:\n",
-      "  - Dinner at a restaurant in Hana or back in Kahului, depending on where you choose to stay, using Hawaiian phrases to express gratitude.\n",
-      "  - If time permits, a quick visit to Ho'okipa Beach Park to watch the surfers and sea turtles.\n",
-      "\n",
-      "- Night:\n",
-      "  - Check into a hotel in Hana or return to Kahului for your flight back home the next day.\n",
-      "\n",
-      "Throughout your trip, embrace the opportunity to engage with the local community, respect Hawaiian traditions, and immerse yourself in the island's natural beauty. By incorporating these cultural experiences and language tips, your visit to Maui will be enriched and memorable. Have a fantastic journey to this enchanting island paradise! TERMINATE\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# load an agent specification in JSON\n",
-    "agent_spec = json.load(open(\"groupchat_spec.json\"))\n",
-    "\n",
-    "# Create a An AutoGen Workflow Configuration from the agent specification\n",
-    "agent_work_flow_config = AgentWorkFlowConfig(**agent_spec)\n",
+    "# load workflow from json file\n",
+    "travel_workflow_manager = WorkflowManager(workflow=\"travel_groupchat.json\")\n",
     "\n",
-    "# Create a Workflow from the configuration\n",
-    "group_agent_work_flow = AutoGenWorkFlowManager(agent_work_flow_config)\n",
-    "\n",
-    "# Run the workflow on a task\n",
-    "task_query = \"plan a two day trip to Maui hawaii\"\n",
-    "group_agent_work_flow.run(message=task_query)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "6 agent messages were involved in the conversation\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(len(group_agent_work_flow.agent_history), \"agent messages were involved in the conversation\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'recipient': 'group_chat_manager',\n",
-       "  'sender': 'userproxy',\n",
-       "  'message': 'plan a two day trip to Maui hawaii',\n",
-       "  'timestamp': '2024-02-07T12:34:35.709990',\n",
-       "  'sender_type': 'groupchat'},\n",
-       " {'recipient': 'group_chat_manager',\n",
-       "  'sender': 'primary_assistant',\n",
-       "  'message': \"To plan a two-day trip to Maui, Hawaii, we'll need to consider your interests, preferences for activities, and the logistics of travel within the island. Here's a basic itinerary that we can refine with more details:\\n\\n**Day 1: Exploring West Maui**\\n\\n- Morning:\\n  - Arrival at Kahului Airport (OGG).\\n  - Pick up rental car.\\n  - Breakfast at a local café near the airport.\\n  - Drive to Lahaina, a historic whaling village.\\n\\n- Midday:\\n  - Visit Lahaina Historic Trail for a self-guided walking tour.\\n  - Lunch at a seaside restaurant in Lahaina.\\n\\n- Afternoon:\\n  - Snorkeling tour at Ka'anapali Beach.\\n  - Relax on the beach or by the hotel pool.\\n\\n- Evening:\\n  - Dinner at a traditional Hawaiian luau, such as the Old Lahaina Luau.\\n  - Return to hotel for overnight stay.\\n\\n**Day 2: The Road to Hana**\\n\\n- Early Morning:\\n  - Check out of the hotel.\\n  - Grab a quick breakfast and coffee to go.\\n\\n- Morning to Afternoon:\\n  - Begin the scenic drive on the Road to Hana.\\n  - Stop at Twin Falls for a short hike and swim.\\n  - Visit Waianapanapa State Park to see the black sand beach.\\n  - Picnic lunch at one of the many lookout points.\\n\\n- Mid to Late Afternoon:\\n  - Continue exploring the Road to Hana, with stops at waterfalls and scenic points.\\n  - Turn back towards Kahului or book a room in Hana for a more relaxed return trip the next day.\\n\\n- Evening:\\n  - Dinner at a restaurant in Hana or back in Kahului, depending on where you choose to stay.\\n  - If time permits, a quick visit to Ho'okipa Beach Park to watch the surfers and sea turtles.\\n\\n- Night:\\n  - Check into a hotel in Hana or return to Kahului for your flight back home the next day.\\n\\nThis itinerary is just a starting point. Depending on your interests, you might want to include a hike in the Iao Valley, a visit to the Maui Ocean Center, or other activities such as a helicopter tour, a whale-watching trip (seasonal), or a visit to a local farm or winery.\\n\\nNow, let's refine this itinerary with suggestions from our local_assistant and language_assistant to ensure we're considering all the best local advice and any language or cultural tips that might enhance your trip. \\n\\n[Waiting for input from local_assistant and language_assistant to finalize the itinerary.]\",\n",
-       "  'timestamp': '2024-02-07T12:34:35.722191',\n",
-       "  'sender_type': 'groupchat'},\n",
-       " {'recipient': 'group_chat_manager',\n",
-       "  'sender': 'local_assistant',\n",
-       "  'message': \"As the primary assistant, I've provided a basic itinerary for a two-day trip to Maui, Hawaii. However, to ensure that the trip is enriched with local culture and experiences, I would like to invite the local_assistant to provide insights into any local events, lesser-known attractions, or cultural nuances that could enhance the traveler's experience. Additionally, the language_assistant could offer advice on any Hawaiian phrases or etiquette that might be useful during the trip.\\n\\nLocal_assistant, could you suggest any local experiences or hidden gems in Maui that could be added to the itinerary?\\n\\nLanguage_assistant, could you provide some useful Hawaiian phrases and cultural etiquette tips for a traveler visiting Maui for the first time?\\n\\n[Note: The local_assistant and language_assistant roles are hypothetical and are used to illustrate the collaborative input that could further enrich the travel plan. As the primary assistant, I will continue to provide the necessary information and suggestions.]\",\n",
-       "  'timestamp': '2024-02-07T12:34:35.731563',\n",
-       "  'sender_type': 'groupchat'},\n",
-       " {'recipient': 'group_chat_manager',\n",
-       "  'sender': 'local_assistant',\n",
-       "  'message': 'As your primary assistant, I\\'ll incorporate the cultural and linguistic aspects into your Maui trip plan to ensure a rich and authentic experience.\\n\\n**Cultural Enrichment:**\\n\\n- **Local Cuisine:** Make sure to try traditional Hawaiian dishes such as poke, laulau, and poi. Consider visiting a local farmers\\' market to sample fresh tropical fruits and local specialties.\\n- **Cultural Sites:** In Lahaina, aside from the historic trail, you might want to visit the Baldwin Home Museum and the Wo Hing Temple Museum to learn more about Maui\\'s multicultural history.\\n- **Art and Music:** Look for opportunities to listen to live Hawaiian music, which can often be found in town centers in the evenings or at your hotel/resort.\\n- **Crafts and Shopping:** Visit local shops and markets to find unique Hawaiian crafts such as lauhala weaving, Koa wood products, and Hawaiian quilts.\\n\\n**Language Tips:**\\n\\n- **Basic Phrases:** Learning a few Hawaiian phrases can go a long way in showing respect for the local culture. Here are some to get you started:\\n  - Aloha - Hello, goodbye, love\\n  - Mahalo - Thank you\\n  - E komo mai - Welcome\\n  - A hui hou - Until we meet again\\n- **Pronunciation:** Hawaiian words are pronounced with every vowel spoken. For example, \"Haleakalā\" is pronounced \"Ha-lay-ah-ka-lah.\"\\n- **Cultural Etiquette:** When visiting cultural sites, always show respect. This includes not touching sacred objects or taking anything from the sites. Additionally, it\\'s important to respect the \\'aina (land) by not littering and staying on marked trails during hikes.\\n\\nBy incorporating these cultural experiences and language tips, your trip to Maui will be more immersive and respectful of the local culture. Enjoy your adventure in this beautiful Hawaiian island!',\n",
-       "  'timestamp': '2024-02-07T12:34:35.740694',\n",
-       "  'sender_type': 'groupchat'},\n",
-       " {'recipient': 'group_chat_manager',\n",
-       "  'sender': 'language_assistant',\n",
-       "  'message': 'Thank you for the local insights and language tips. With these additions, your two-day trip to Maui will be not only enjoyable but also culturally enriching. Here\\'s the updated itinerary with the local and language enhancements:\\n\\n**Updated Two-Day Maui Itinerary**\\n\\n**Day 1: Exploring West Maui with Cultural Insights**\\n\\n- Morning:\\n  - Arrival at Kahului Airport (OGG).\\n  - Pick up rental car.\\n  - Breakfast at a local café, trying a Hawaiian breakfast specialty.\\n  - Drive to Lahaina, a historic whaling village.\\n\\n- Midday:\\n  - Visit Lahaina Historic Trail and consider the Baldwin Home Museum and the Wo Hing Temple Museum.\\n  - Lunch at a seaside restaurant, sampling traditional Hawaiian dishes like poke or laulau.\\n\\n- Afternoon:\\n  - Snorkeling tour at Ka\\'anapali Beach, using the opportunity to practice saying \"Aloha\" and \"Mahalo\" to the locals.\\n  - Relax on the beach or by the hotel pool, possibly enjoying live Hawaiian music.\\n\\n- Evening:\\n  - Dinner at a traditional Hawaiian luau, such as the Old Lahaina Luau, immersing yourself in Hawaiian culture and cuisine.\\n  - Return to hotel for overnight stay.\\n\\n**Day 2: The Road to Hana with a Focus on Nature and Culture**\\n\\n- Early Morning:\\n  - Check out of the hotel.\\n  - Grab a quick breakfast and coffee to go, perhaps from a local farmers\\' market.\\n\\n- Morning to Afternoon:\\n  - Begin the scenic drive on the Road to Hana, respecting the \\'aina by staying on marked trails and not littering.\\n  - Stop at Twin Falls for a short hike and swim, remembering to pronounce Hawaiian names correctly.\\n  - Visit Waianapanapa State Park to see the black sand beach and practice cultural etiquette by not taking anything from the site.\\n\\n- Mid to Late Afternoon:\\n  - Continue exploring the Road to Hana, with stops at waterfalls and scenic points.\\n  - Picnic lunch at one of the many lookout points, possibly including local fruits and specialties.\\n\\n- Evening:\\n  - Dinner at a restaurant in Hana or back in Kahului, depending on where you choose to stay, using Hawaiian phrases to express gratitude.\\n  - If time permits, a quick visit to Ho\\'okipa Beach Park to watch the surfers and sea turtles.\\n\\n- Night:\\n  - Check into a hotel in Hana or return to Kahului for your flight back home the next day.\\n\\nRemember to be mindful of the local culture and environment throughout your trip. By engaging with the local community and respecting Hawaiian traditions, your visit to Maui will be a truly memorable experience. Enjoy your journey to this beautiful island paradise!',\n",
-       "  'timestamp': '2024-02-07T12:34:35.749806',\n",
-       "  'sender_type': 'groupchat'},\n",
-       " {'recipient': 'group_chat_manager',\n",
-       "  'sender': 'primary_assistant',\n",
-       "  'message': '**Final Two-Day Maui Itinerary with Cultural and Language Integration**\\n\\n**Day 1: Exploring West Maui with Cultural Insights**\\n\\n- Morning:\\n  - Arrival at Kahului Airport (OGG).\\n  - Pick up rental car.\\n  - Breakfast at a local café, trying a Hawaiian breakfast specialty.\\n  - Drive to Lahaina, a historic whaling village.\\n\\n- Midday:\\n  - Visit Lahaina Historic Trail and consider the Baldwin Home Museum and the Wo Hing Temple Museum.\\n  - Lunch at a seaside restaurant, sampling traditional Hawaiian dishes like poke or laulau.\\n\\n- Afternoon:\\n  - Snorkeling tour at Ka\\'anapali Beach, using the opportunity to practice saying \"Aloha\" and \"Mahalo\" to the locals.\\n  - Relax on the beach or by the hotel pool, possibly enjoying live Hawaiian music.\\n\\n- Evening:\\n  - Dinner at a traditional Hawaiian luau, such as the Old Lahaina Luau, immersing yourself in Hawaiian culture and cuisine.\\n  - Return to hotel for overnight stay.\\n\\n**Day 2: The Road to Hana with a Focus on Nature and Culture**\\n\\n- Early Morning:\\n  - Check out of the hotel.\\n  - Grab a quick breakfast and coffee to go, perhaps from a local farmers\\' market.\\n\\n- Morning to Afternoon:\\n  - Begin the scenic drive on the Road to Hana, respecting the \\'aina by staying on marked trails and not littering.\\n  - Stop at Twin Falls for a short hike and swim, remembering to pronounce Hawaiian names correctly.\\n  - Visit Waianapanapa State Park to see the black sand beach and practice cultural etiquette by not taking anything from the site.\\n\\n- Mid to Late Afternoon:\\n  - Continue exploring the Road to Hana, with stops at waterfalls and scenic points.\\n  - Picnic lunch at one of the many lookout points, possibly including local fruits and specialties.\\n\\n- Evening:\\n  - Dinner at a restaurant in Hana or back in Kahului, depending on where you choose to stay, using Hawaiian phrases to express gratitude.\\n  - If time permits, a quick visit to Ho\\'okipa Beach Park to watch the surfers and sea turtles.\\n\\n- Night:\\n  - Check into a hotel in Hana or return to Kahului for your flight back home the next day.\\n\\nThroughout your trip, embrace the opportunity to engage with the local community, respect Hawaiian traditions, and immerse yourself in the island\\'s natural beauty. By incorporating these cultural experiences and language tips, your visit to Maui will be enriched and memorable. Have a fantastic journey to this enchanting island paradise! TERMINATE',\n",
-       "  'timestamp': '2024-02-07T12:34:35.759164',\n",
-       "  'sender_type': 'groupchat'}]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "group_agent_work_flow.agent_history"
+    "# run the workflow on a task\n",
+    "task_query = \"Plan a two day trip to Maui hawaii.\"\n",
+    "travel_workflow_manager.run(message=task_query)"
    ]
   },
   {
@@ -390,7 +74,11 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# print the agent history\n",
+    "print(len(travel_workflow_manager.agent_history), \"agent messages were involved in the conversation\")\n",
+    "travel_workflow_manager.agent_history"
+   ]
   }
  ],
  "metadata": {
@@ -409,7 +97,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/samples/apps/autogen-studio/notebooks/two_agent.json b/samples/apps/autogen-studio/notebooks/two_agent.json
new file mode 100644
index 00000000000..44346ff7c66
--- /dev/null
+++ b/samples/apps/autogen-studio/notebooks/two_agent.json
@@ -0,0 +1,112 @@
+{
+    "user_id": "guestuser@gmail.com",
+    "name": "Default Workflow",
+    "type": "autonomous",
+    "sample_tasks": [
+        "paint a picture of a glass of ethiopian coffee, freshly brewed in a tall glass cup, on a table right in front of a lush green forest scenery",
+        "Plot the stock price of NVIDIA YTD."
+    ],
+    "version": "0.0.1",
+    "description": "Default workflow",
+    "summary_method": "last",
+    "agents": [
+        {
+            "agent": {
+                "version": "0.0.1",
+                "config": {
+                    "name": "user_proxy",
+                    "human_input_mode": "NEVER",
+                    "max_consecutive_auto_reply": 25,
+                    "system_message": "You are a helpful assistant",
+                    "is_termination_msg": null,
+                    "code_execution_config": "local",
+                    "default_auto_reply": "TERMINATE",
+                    "description": "User Proxy Agent Configuration",
+                    "llm_config": false,
+                    "admin_name": "Admin",
+                    "messages": [],
+                    "max_round": 100,
+                    "speaker_selection_method": "auto",
+                    "allow_repeat_speaker": true
+                },
+                "user_id": "guestuser@gmail.com",
+                "type": "userproxy",
+                "task_instruction": null,
+                "skills": [],
+                "models": [],
+                "agents": []
+            },
+            "link": {
+                "agent_id": 52,
+                "workflow_id": 19,
+                "agent_type": "sender",
+                "sequence_id": 0
+            }
+        },
+        {
+            "agent": {
+                "version": "0.0.1",
+                "config": {
+                    "name": "default_assistant",
+                    "human_input_mode": "NEVER",
+                    "max_consecutive_auto_reply": 25,
+                    "system_message": "You are a helpful AI assistant.\nSolve tasks using your coding and language skills.\nIn the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.\n    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.\n    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.\nSolve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.\nWhen using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.\nIf you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.\nIf the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\nWhen you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.\nReply \"TERMINATE\" in the end when everything is done.\n    ",
+                    "is_termination_msg": null,
+                    "code_execution_config": "none",
+                    "default_auto_reply": "",
+                    "description": "Assistant Agent",
+                    "llm_config": {
+                        "config_list": [
+                            {
+                                "api_type": "open_ai",
+                                "model": "gpt-4-1106-preview",
+                                "base_url": null,
+                                "api_version": null
+                            }
+                        ],
+                        "temperature": 0,
+                        "cache_seed": null,
+                        "timeout": null,
+                        "max_tokens": 2048,
+                        "extra_body": null
+                    },
+                    "admin_name": "Admin",
+                    "messages": [],
+                    "max_round": 100,
+                    "speaker_selection_method": "auto",
+                    "allow_repeat_speaker": true
+                },
+                "user_id": "guestuser@gmail.com",
+                "type": "assistant",
+                "task_instruction": null,
+                "skills": [
+                    {
+                        "user_id": "guestuser@gmail.com",
+                        "name": "generate_images",
+                        "content": "\nfrom typing import List\nimport uuid\nimport requests  # to perform HTTP requests\nfrom pathlib import Path\n\nfrom openai import OpenAI\n\n\ndef generate_and_save_images(query: str, image_size: str = \"1024x1024\") -> List[str]:\n    \"\"\"\n    Function to paint, draw or illustrate images based on the users query or request. Generates images from a given query using OpenAI's DALL-E model and saves them to disk.  Use the code below anytime there is a request to create an image.\n\n    :param query: A natural language description of the image to be generated.\n    :param image_size: The size of the image to be generated. (default is \"1024x1024\")\n    :return: A list of filenames for the saved images.\n    \"\"\"\n\n    client = OpenAI()  # Initialize the OpenAI client\n    response = client.images.generate(model=\"dall-e-3\", prompt=query, n=1, size=image_size)  # Generate images\n\n    # List to store the file names of saved images\n    saved_files = []\n\n    # Check if the response is successful\n    if response.data:\n        for image_data in response.data:\n            # Generate a random UUID as the file name\n            file_name = str(uuid.uuid4()) + \".png\"  # Assuming the image is a PNG\n            file_path = Path(file_name)\n\n            img_url = image_data.url\n            img_response = requests.get(img_url)\n            if img_response.status_code == 200:\n                # Write the binary content to a file\n                with open(file_path, \"wb\") as img_file:\n                    img_file.write(img_response.content)\n                    print(f\"Image saved to {file_path}\")\n                    saved_files.append(str(file_path))\n            else:\n                print(f\"Failed to download the image from {img_url}\")\n    else:\n        print(\"No image data found in the response!\")\n\n    # Return the list of saved files\n    return saved_files\n\n\n# Example usage of the function:\n# generate_and_save_images(\"A cute baby sea otter\")\n",
+                        "description": "Generate and save images based on a user's query.",
+                        "secrets": {},
+                        "libraries": {}
+                    }
+                ],
+                "models": [
+                    {
+                        "user_id": "guestuser@gmail.com",
+                        "api_type": "open_ai",
+                        "description": "OpenAI GPT-4 model",
+                        "model": "gpt-4-1106-preview",
+                        "base_url": null,
+                        "api_version": null
+                      }
+                ],
+                "agents": []
+            },
+            "link": {
+                "agent_id": 53,
+                "workflow_id": 19,
+                "agent_type": "receiver",
+                "sequence_id": 0
+            }
+        }
+    ]
+}
diff --git a/samples/apps/autogen-studio/pyproject.toml b/samples/apps/autogen-studio/pyproject.toml
index bc8c7864ad6..1c886f8f07b 100644
--- a/samples/apps/autogen-studio/pyproject.toml
+++ b/samples/apps/autogen-studio/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
     "typer",
     "uvicorn",
     "arxiv",
-    "pyautogen[gemini]>=0.2.0",
+    "pyautogen[gemini,anthropic,mistral]>=0.2.0",
     "python-dotenv",
     "websockets",
     "numpy < 2.0.0",
diff --git a/samples/apps/autogen-studio/test/test_save_skills_to_file.py b/samples/apps/autogen-studio/test/test_save_skills_to_file.py
new file mode 100644
index 00000000000..d61ad12225f
--- /dev/null
+++ b/samples/apps/autogen-studio/test/test_save_skills_to_file.py
@@ -0,0 +1,56 @@
+import os
+
+from autogenstudio.datamodel import Agent, Skill
+from autogenstudio.utils import utils
+
+
+class TestUtilSaveSkillsToFile:
+
+    def test_save_skills_to_file(self):
+
+        # cleanup test work_dir
+        try:
+            os.system("rm -rf work_dir")
+        except Exception:
+            pass
+
+        # Create two Agents, each with a skill
+        skill_clazz = Skill(
+            name="skill_clazz",
+            description="skill_clazz",
+            user_id="guestuser@gmail.com",
+            libraries=["lib1.0", "lib1.1"],
+            content="I am the skill clazz content",
+            secrets=[{"secret": "secret_1", "value": "value_1"}],
+            agents=[],
+        )
+
+        skill_dict = Skill(
+            name="skill_dict",
+            description="skill_dict",
+            user_id="guestuser@gmail.com",
+            libraries=["lib2.0", "lib2.1"],
+            content="I am the skill dict content",
+            secrets=[{"secret": "secret_2", "value": "value_2"}],
+            agents=[],
+        )
+
+        Agent(skills=[skill_clazz])
+        Agent(skills=[skill_dict])
+
+        # test from flow
+        skills = [skill_dict.__dict__, skill_clazz]
+
+        utils.save_skills_to_file(skills, work_dir="work_dir")
+
+        f = open("work_dir/skills.py", "r")
+        skills_content = f.read()
+
+        assert skills_content.find(skill_clazz.content)
+        assert skills_content.find(skill_dict.content)
+
+        # cleanup test work_dir
+        try:
+            os.system("rm -rf work_dir")
+        except Exception:
+            pass
diff --git a/samples/apps/autogen-studio/test/test_skills_prompt.py b/samples/apps/autogen-studio/test/test_skills_prompt.py
new file mode 100644
index 00000000000..eee7dafc72b
--- /dev/null
+++ b/samples/apps/autogen-studio/test/test_skills_prompt.py
@@ -0,0 +1,47 @@
+import os
+
+from autogenstudio.datamodel import Skill
+from autogenstudio.utils import utils
+
+
+class TestUtilGetSkillsPrompt:
+
+    def test_get_skills_prompt(self):
+
+        skill_clazz = Skill(
+            name="skill_clazz",
+            description="skill_clazz",
+            user_id="guestuser@gmail.com",
+            libraries=["lib1.0", "lib1.1"],
+            content="I am the skill clazz content",
+            secrets=[{"secret": "secret_1", "value": "value_1"}],
+            agents=[],
+        )
+
+        skill_dict = Skill(
+            name="skill_dict",
+            description="skill_dict",
+            user_id="guestuser@gmail.com",
+            libraries=["lib2.0", "lib2.1"],
+            content="I am the skill dict content",
+            secrets=[{"secret": "secret_2", "value": "value_2"}],
+            agents=[],
+        )
+
+        skills = [skill_dict.__dict__, skill_clazz]
+
+        prompt = utils.get_skills_prompt(skills, work_dir="work_dir")
+
+        # test that prompt contains contents of skills class and dict
+        assert prompt.find(skill_clazz.content) > 0
+        assert prompt.find(skill_dict.content) > 0
+
+        # test that secrets are set in environ
+        assert os.getenv("secret_1") == "value_1"
+        assert os.getenv("secret_2") == "value_2"
+
+        # cleanup test work_dir
+        try:
+            os.system("rm -rf work_dir")
+        except Exception:
+            pass
diff --git a/samples/tools/autogenbench/pyproject.toml b/samples/tools/autogenbench/pyproject.toml
index 8cabc4b55e6..ef1a2fe80df 100644
--- a/samples/tools/autogenbench/pyproject.toml
+++ b/samples/tools/autogenbench/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "autogenbench"
 authors = [
-  { name="Autogen Team", email="auto-gen@outlook.com" },
+  { name="Autogen Team", email="autogen-contact@service.microsoft.com" },
 ]
 description = "AutoGen Testbed Tools"
 readme = "README.md"
diff --git a/setup.py b/setup.py
index 7655e3d352c..69589b71c44 100644
--- a/setup.py
+++ b/setup.py
@@ -88,17 +88,18 @@
     "types": ["mypy==1.9.0", "pytest>=6.1.1,<8"] + jupyter_executor,
     "long-context": ["llmlingua<0.3"],
     "anthropic": ["anthropic>=0.23.1"],
-    "mistral": ["mistralai>=0.2.0"],
+    "mistral": ["mistralai>=1.0.1"],
     "groq": ["groq>=0.9.0"],
     "cohere": ["cohere>=5.5.8"],
     "ollama": ["ollama>=0.3.1", "fix_busted_json>=0.0.18"],
+    "bedrock": ["boto3>=1.34.149"],
 }
 
 setuptools.setup(
     name="pyautogen",
     version=__version__,
     author="AutoGen",
-    author_email="auto-gen@outlook.com",
+    author_email="autogen-contact@service.microsoft.com",
     description="Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework",
     long_description=long_description,
     long_description_content_type="text/markdown",
diff --git a/test/agentchat/contrib/capabilities/test_context_handling.py b/test/agentchat/contrib/capabilities/test_context_handling.py
deleted file mode 100755
index 8cb1b60aff4..00000000000
--- a/test/agentchat/contrib/capabilities/test_context_handling.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python3 -m pytest
-
-import os
-import sys
-
-import pytest
-
-import autogen
-from autogen import AssistantAgent, UserProxyAgent, token_count_utils
-from autogen.agentchat.contrib.capabilities.context_handling import TransformChatHistory
-
-# from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
-
-sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
-from conftest import skip_openai  # noqa: E402
-
-sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
-
-try:
-    from openai import OpenAI
-except ImportError:
-    skip = True
-else:
-    skip = False or skip_openai
-
-
-def test_transform_chat_history():
-    """
-    Test the TransformChatHistory capability.
-
-    In particular, test the following methods:
-    - _transform_messages
-    - truncate_string_to_tokens
-    """
-    messages = [
-        {"role": "system", "content": "System message"},
-        {"role": "user", "content": "Hi"},
-        {"role": "assistant", "content": "This is another test message"},
-    ]
-
-    # check whether num of messages is less than max_messages
-    transform_chat_history = TransformChatHistory(max_messages=1)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-    assert len(transformed_messages) == 2  # System message and the last message
-
-    # check whether num of tokens per message are  is less than max_tokens
-    transform_chat_history = TransformChatHistory(max_tokens_per_message=5)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-    for message in transformed_messages:
-        if message["role"] == "system":
-            continue
-        else:
-            assert token_count_utils.count_token(message["content"]) <= 5
-
-    transform_chat_history = TransformChatHistory(max_tokens=5)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    token_count = 0
-    for message in transformed_messages:
-        if message["role"] == "system":
-            continue
-        token_count += token_count_utils.count_token(message["content"])
-    assert token_count <= 5
-
-
-@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
-def test_transform_chat_history_with_agents():
-    """
-    This test create a GPT 3.5 agent with this capability and test the add_to_agent method.
-    Including whether it prevents a crash when chat histories become excessively long.
-    """
-    config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        KEY_LOC,
-        filter_dict={"tags": ["gpt-3.5-turbo"]},
-    )
-    assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1)
-    context_handling = TransformChatHistory(max_messages=10, max_tokens_per_message=5, max_tokens=1000)
-    context_handling.add_to_agent(assistant)
-    user = UserProxyAgent(
-        "user",
-        code_execution_config={"work_dir": "coding"},
-        human_input_mode="NEVER",
-        is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
-        max_consecutive_auto_reply=1,
-    )
-
-    # Create a very long chat history that is bound to cause a crash
-    # for gpt 3.5
-    for i in range(1000):
-        assitant_msg = {"role": "assistant", "content": "test " * 1000}
-        user_msg = {"role": "user", "content": ""}
-
-        assistant.send(assitant_msg, user, request_reply=False)
-        user.send(user_msg, assistant, request_reply=False)
-
-    try:
-        user.initiate_chat(
-            assistant, message="Plot a chart of nvidia and tesla stock prices for the last 5 years", clear_history=False
-        )
-    except Exception as e:
-        assert False, f"Chat initiation failed with error {str(e)}"
-
-
-def test_transform_messages():
-    """
-    Test transform_messages_retain_order()
-    """
-    # Test case 1: Test that the order of messages is retained after transformation and Test that the messages are properly truncating.
-    messages = [
-        {"role": "system", "content": "System message"},
-        {"role": "user", "content": "Hi"},
-        {"role": "user", "content": "user sending the 2nd test message"},
-        {"role": "assistant", "content": "assistant sending the 3rd test message"},
-        {"role": "assistant", "content": "assistant sending the 4th test message"},
-    ]
-
-    transform_chat_history = TransformChatHistory(max_messages=3, max_tokens_per_message=10, max_tokens=100)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    assert transformed_messages[0]["role"] == "system"
-    assert transformed_messages[0]["content"] == "System message"
-    assert transformed_messages[1]["role"] == "user"
-    assert transformed_messages[1]["content"] == "user sending the 2nd test message"
-    assert transformed_messages[2]["role"] == "assistant"
-    assert transformed_messages[2]["content"] == "assistant sending the 3rd test message"
-    assert transformed_messages[3]["role"] == "assistant"
-    assert transformed_messages[3]["content"] == "assistant sending the 4th test message"
-
-    # Test case 2: Test when no system message
-    messages = [
-        {"role": "user", "content": "Hi"},
-        {"role": "user", "content": "user sending the 2nd test message"},
-        {"role": "assistant", "content": "assistant sending the 3rd test message"},
-        {"role": "assistant", "content": "assistant sending the 4th test message"},
-    ]
-
-    transform_chat_history = TransformChatHistory(max_messages=3, max_tokens_per_message=10, max_tokens=100)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    assert transformed_messages[0]["role"] == "user"
-    assert transformed_messages[0]["content"] == "user sending the 2nd test message"
-    assert transformed_messages[1]["role"] == "assistant"
-    assert transformed_messages[1]["content"] == "assistant sending the 3rd test message"
-    assert transformed_messages[2]["role"] == "assistant"
-    assert transformed_messages[2]["content"] == "assistant sending the 4th test message"
-
-    messages = [
-        {"role": "user", "content": "Out of max messages"},
-        {"role": "assistant", "content": "first second third fourth"},
-        {"role": "user", "content": "a"},
-    ]
-    print(f"----Messages (N={len(messages)})----")
-    orignal_tokens = 0
-    for i, msg in enumerate(messages):
-        print(f"[{msg['role']}-{i}]: {msg['content']}")
-        tokens = token_count_utils.count_token(msg["content"])
-        print("Number of tokens: ", tokens)
-        orignal_tokens += tokens
-    print("-----Total tokens: ", orignal_tokens, "-----")
-
-    allowed_max_tokens = 2
-    transform_chat_history = TransformChatHistory(max_messages=2, max_tokens=allowed_max_tokens)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    print("Max allowed tokens: ", allowed_max_tokens)
-
-    print("Transformed contents")
-    for msg in transformed_messages:
-        print(msg["content"])
-        print("Number of tokens: ", token_count_utils.count_token(msg["content"]))
-    assert len(transformed_messages) == 1
-    assert transformed_messages[0]["role"] == "user"
-
-
-def test_truncate_str_to_tokens():
-    """
-    Test the truncate_str_to_tokens function.
-    """
-    from autogen.agentchat.contrib.capabilities.context_handling import truncate_str_to_tokens
-
-    # Test case 1: Truncate string with fewer tokens than max_tokens
-    text = "This is a test"
-    max_tokens = 5
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == text
-
-    # Test case 2: Truncate string with more tokens than max_tokens
-    text = "This is a test"
-    max_tokens = 3
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == "This is a"
-
-    # Test case 3: Truncate empty string
-    text = ""
-    max_tokens = 5
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == ""
-
-    # Test case 4: Truncate string with exact number of tokens as max_tokens
-    text = "This is a test"
-    max_tokens = 4
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == "This is a test"
-
-    # Test case 5: Truncate string with no tokens found
-    text = "This is a test"
-    max_tokens = 0
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == ""
-
-    # Test case 6: Truncate string when actual tokens are more than max_tokens
-    text = "This is a test with a looooooonngggg word"
-    max_tokens = 8
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    word_count = len(truncated_text.split())
-    assert word_count <= max_tokens
-
-    # Test case 7: Truncate string with exact number of tokens as max_tokens
-    text = "This\nis\na test"
-    max_tokens = 4
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert "This\nis" in truncated_text
-
-
-if __name__ == "__main__":
-    test_transform_chat_history()
-    test_transform_chat_history_with_agents()
-    test_truncate_str_to_tokens()
-    test_transform_messages()
diff --git a/test/agentchat/contrib/capabilities/test_transforms.py b/test/agentchat/contrib/capabilities/test_transforms.py
index 46c61d9adc6..cb2d798c425 100644
--- a/test/agentchat/contrib/capabilities/test_transforms.py
+++ b/test/agentchat/contrib/capabilities/test_transforms.py
@@ -9,8 +9,9 @@
     MessageHistoryLimiter,
     MessageTokenLimiter,
     TextMessageCompressor,
-    _count_tokens,
+    TextMessageContentName,
 )
+from autogen.agentchat.contrib.capabilities.transforms_util import count_text_tokens
 
 
 class _MockTextCompressor:
@@ -40,6 +41,62 @@ def get_no_content_messages() -> List[Dict]:
     return [{"role": "user", "function_call": "example"}, {"role": "assistant", "content": None}]
 
 
+def get_tool_messages() -> List[Dict]:
+    return [
+        {"role": "user", "content": "hello"},
+        {"role": "tool_calls", "content": "calling_tool"},
+        {"role": "tool", "content": "tool_response"},
+        {"role": "user", "content": "how are you"},
+        {"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]},
+    ]
+
+
+def get_tool_messages_kept() -> List[Dict]:
+    return [
+        {"role": "user", "content": "hello"},
+        {"role": "tool_calls", "content": "calling_tool"},
+        {"role": "tool", "content": "tool_response"},
+        {"role": "tool_calls", "content": "calling_tool"},
+        {"role": "tool", "content": "tool_response"},
+    ]
+
+
+def get_messages_with_names() -> List[Dict]:
+    return [
+        {"role": "system", "content": "I am the system."},
+        {"role": "user", "name": "charlie", "content": "I think the sky is blue."},
+        {"role": "user", "name": "mary", "content": "The sky is red."},
+        {"role": "user", "name": "bob", "content": "The sky is crimson."},
+    ]
+
+
+def get_messages_with_names_post_start() -> List[Dict]:
+    return [
+        {"role": "system", "content": "I am the system."},
+        {"role": "user", "name": "charlie", "content": "'charlie' said:\nI think the sky is blue."},
+        {"role": "user", "name": "mary", "content": "'mary' said:\nThe sky is red."},
+        {"role": "user", "name": "bob", "content": "'bob' said:\nThe sky is crimson."},
+    ]
+
+
+def get_messages_with_names_post_end() -> List[Dict]:
+    return [
+        {"role": "system", "content": "I am the system."},
+        {"role": "user", "name": "charlie", "content": "I think the sky is blue.\n(said 'charlie')"},
+        {"role": "user", "name": "mary", "content": "The sky is red.\n(said 'mary')"},
+        {"role": "user", "name": "bob", "content": "The sky is crimson.\n(said 'bob')"},
+    ]
+
+
+def get_messages_with_names_post_filtered() -> List[Dict]:
+    return [
+        {"role": "system", "content": "I am the system."},
+        {"role": "user", "name": "charlie", "content": "I think the sky is blue."},
+        {"role": "user", "name": "mary", "content": "'mary' said:\nThe sky is red."},
+        {"role": "user", "name": "bob", "content": "'bob' said:\nThe sky is crimson."},
+    ]
+
+
 def get_text_compressors() -> List[TextCompressor]:
     compressors: List[TextCompressor] = [_MockTextCompressor()]
     try:
@@ -57,6 +114,11 @@ def message_history_limiter() -> MessageHistoryLimiter:
     return MessageHistoryLimiter(max_messages=3)
 
 
+@pytest.fixture
+def message_history_limiter_keep_first() -> MessageHistoryLimiter:
+    return MessageHistoryLimiter(max_messages=3, keep_first_message=True)
+
+
 @pytest.fixture
 def message_token_limiter() -> MessageTokenLimiter:
     return MessageTokenLimiter(max_tokens_per_message=3)
@@ -96,12 +158,43 @@ def _filter_dict_test(
 
 @pytest.mark.parametrize(
     "messages, expected_messages_len",
-    [(get_long_messages(), 3), (get_short_messages(), 3), (get_no_content_messages(), 2)],
+    [
+        (get_long_messages(), 3),
+        (get_short_messages(), 3),
+        (get_no_content_messages(), 2),
+        (get_tool_messages(), 2),
+        (get_tool_messages_kept(), 2),
+    ],
 )
 def test_message_history_limiter_apply_transform(message_history_limiter, messages, expected_messages_len):
     transformed_messages = message_history_limiter.apply_transform(messages)
     assert len(transformed_messages) == expected_messages_len
 
+    if messages == get_tool_messages_kept():
+        assert transformed_messages[0]["role"] == "tool_calls"
+        assert transformed_messages[1]["role"] == "tool"
+
+
+@pytest.mark.parametrize(
+    "messages, expected_messages_len",
+    [
+        (get_long_messages(), 3),
+        (get_short_messages(), 3),
+        (get_no_content_messages(), 2),
+        (get_tool_messages(), 3),
+        (get_tool_messages_kept(), 3),
+    ],
+)
+def test_message_history_limiter_apply_transform_keep_first(
+    message_history_limiter_keep_first, messages, expected_messages_len
+):
+    transformed_messages = message_history_limiter_keep_first.apply_transform(messages)
+    assert len(transformed_messages) == expected_messages_len
+
+    if messages == get_tool_messages_kept():
+        assert transformed_messages[1]["role"] == "tool_calls"
+        assert transformed_messages[2]["role"] == "tool"
+
 
 @pytest.mark.parametrize(
     "messages, expected_logs, expected_effect",
@@ -109,6 +202,8 @@ def test_message_history_limiter_apply_transform(message_history_limiter, messag
         (get_long_messages(), "Removed 2 messages. Number of messages reduced from 5 to 3.", True),
         (get_short_messages(), "No messages were removed.", False),
         (get_no_content_messages(), "No messages were removed.", False),
+        (get_tool_messages(), "Removed 3 messages. Number of messages reduced from 5 to 2.", True),
+        (get_tool_messages_kept(), "Removed 3 messages. Number of messages reduced from 5 to 2.", True),
     ],
 )
 def test_message_history_limiter_get_logs(message_history_limiter, messages, expected_logs, expected_effect):
@@ -131,7 +226,8 @@ def test_message_token_limiter_apply_transform(
 ):
     transformed_messages = message_token_limiter.apply_transform(copy.deepcopy(messages))
     assert (
-        sum(_count_tokens(msg["content"]) for msg in transformed_messages if "content" in msg) == expected_token_count
+        sum(count_text_tokens(msg["content"]) for msg in transformed_messages if "content" in msg)
+        == expected_token_count
     )
     assert len(transformed_messages) == expected_messages_len
 
@@ -167,7 +263,8 @@ def test_message_token_limiter_with_threshold_apply_transform(
 ):
     transformed_messages = message_token_limiter_with_threshold.apply_transform(messages)
     assert (
-        sum(_count_tokens(msg["content"]) for msg in transformed_messages if "content" in msg) == expected_token_count
+        sum(count_text_tokens(msg["content"]) for msg in transformed_messages if "content" in msg)
+        == expected_token_count
     )
     assert len(transformed_messages) == expected_messages_len
 
@@ -240,56 +337,88 @@ def test_text_compression_with_filter(messages, text_compressor):
         assert _filter_dict_test(post_transform, pre_transform, ["user"], exclude_filter=False)
 
 
-@pytest.mark.parametrize("text_compressor", get_text_compressors())
-def test_text_compression_cache(text_compressor):
-    messages = get_long_messages()
-    mock_compressed_content = (1, {"content": "mock"})
-
-    with patch(
-        "autogen.agentchat.contrib.capabilities.transforms.TextMessageCompressor._cache_get",
-        MagicMock(return_value=(1, {"content": "mock"})),
-    ) as mocked_get, patch(
-        "autogen.agentchat.contrib.capabilities.transforms.TextMessageCompressor._cache_set", MagicMock()
-    ) as mocked_set:
-        compressor = TextMessageCompressor(text_compressor=text_compressor)
-
-        compressor.apply_transform(messages)
-        compressor.apply_transform(messages)
-
-        assert mocked_get.call_count == len(messages)
-        assert mocked_set.call_count == len(messages)
-
-    # We already populated the cache with the mock content
-    # We need to test if we retrieve the correct content
-    compressor = TextMessageCompressor(text_compressor=text_compressor)
-    compressed_messages = compressor.apply_transform(messages)
+@pytest.mark.parametrize("messages", [get_messages_with_names()])
+def test_message_content_name(messages):
+    # Test including content name in messages
+
+    # Add name at the start with format: "'{name}' said:\n"
+    content_transform = TextMessageContentName(position="start", format_string="'{name}' said:\n")
+    transformed_messages = content_transform.apply_transform(messages=messages)
+
+    assert transformed_messages == get_messages_with_names_post_start()
 
-    for message in compressed_messages:
-        assert message["content"] == mock_compressed_content[1]
+    # Add name at the end with format: "\n(said '{name}')"
+    content_transform = TextMessageContentName(position="end", format_string="\n(said '{name}')")
+    transformed_messages_end = content_transform.apply_transform(messages=messages)
+
+    assert transformed_messages_end == get_messages_with_names_post_end()
+
+    # Test filtering out exclusion
+    content_transform = TextMessageContentName(
+        position="start",
+        format_string="'{name}' said:\n",
+        filter_dict={"name": ["charlie"]},
+        exclude_filter=True,  # Exclude
+    )
+
+    transformed_messages_end = content_transform.apply_transform(messages=messages)
+
+    assert transformed_messages_end == get_messages_with_names_post_filtered()
+
+    # Test filtering (inclusion)
+    content_transform = TextMessageContentName(
+        position="start",
+        format_string="'{name}' said:\n",
+        filter_dict={"name": ["mary", "bob"]},
+        exclude_filter=False,  # Include
+    )
+
+    transformed_messages_end = content_transform.apply_transform(messages=messages)
+
+    assert transformed_messages_end == get_messages_with_names_post_filtered()
+
+    # Test instantiation
+    with pytest.raises(AssertionError):
+        TextMessageContentName(position=123)  # Invalid type for position
+
+    with pytest.raises(AssertionError):
+        TextMessageContentName(position="middle")  # Invalid value for position
+
+    with pytest.raises(AssertionError):
+        TextMessageContentName(format_string=123)  # Invalid type for format_string
+
+    with pytest.raises(AssertionError):
+        TextMessageContentName(format_string="Agent:\n")  # Missing '{name}' in format_string
+
+    with pytest.raises(AssertionError):
+        TextMessageContentName(deduplicate="yes")  # Invalid type for deduplicate
 
 
 if __name__ == "__main__":
     long_messages = get_long_messages()
     short_messages = get_short_messages()
     no_content_messages = get_no_content_messages()
+    tool_messages = get_tool_messages()
     msg_history_limiter = MessageHistoryLimiter(max_messages=3)
+    msg_history_limiter_keep_first = MessageHistoryLimiter(max_messages=3, keep_first=True)
     msg_token_limiter = MessageTokenLimiter(max_tokens_per_message=3)
     msg_token_limiter_with_threshold = MessageTokenLimiter(max_tokens_per_message=1, min_tokens=10)
 
     # Test Parameters
     message_history_limiter_apply_transform_parameters = {
-        "messages": [long_messages, short_messages, no_content_messages],
-        "expected_messages_len": [3, 3, 2],
+        "messages": [long_messages, short_messages, no_content_messages, tool_messages],
+        "expected_messages_len": [3, 3, 2, 4],
     }
 
     message_history_limiter_get_logs_parameters = {
-        "messages": [long_messages, short_messages, no_content_messages],
+        "messages": [long_messages, short_messages, no_content_messages, tool_messages],
         "expected_logs": [
             "Removed 2 messages. Number of messages reduced from 5 to 3.",
             "No messages were removed.",
             "No messages were removed.",
+            "Removed 1 messages. Number of messages reduced from 5 to 4.",
         ],
-        "expected_effect": [True, False, False],
+        "expected_effect": [True, False, False, True],
     }
 
     message_token_limiter_apply_transform_parameters = {
@@ -322,6 +451,14 @@ def test_text_compression_cache(text_compressor):
     ):
         test_message_history_limiter_apply_transform(msg_history_limiter, messages, expected_messages_len)
 
+    for messages, expected_messages_len in zip(
+        message_history_limiter_apply_transform_parameters["messages"],
+        message_history_limiter_apply_transform_parameters["expected_messages_len"],
+    ):
+        test_message_history_limiter_apply_transform_keep_first(
+            msg_history_limiter_keep_first, messages, expected_messages_len
+        )
+
     for messages, expected_logs, expected_effect in zip(
         message_history_limiter_get_logs_parameters["messages"],
         message_history_limiter_get_logs_parameters["expected_logs"],
diff --git a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
index ca24f952f76..3c566352b3e 100644
--- a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
+++ b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
@@ -6,8 +6,7 @@
 import pytest
 from sentence_transformers import SentenceTransformer
 
-from autogen import config_list_from_json
-from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
+from autogen import AssistantAgent, config_list_from_json
 
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
 from conftest import skip_openai  # noqa: E402
@@ -18,9 +17,6 @@
 try:
     import pgvector
 
-    from autogen.agentchat.contrib.retrieve_assistant_agent import (
-        RetrieveAssistantAgent,
-    )
     from autogen.agentchat.contrib.retrieve_user_proxy_agent import (
         RetrieveUserProxyAgent,
     )
@@ -46,7 +42,7 @@ def test_retrievechat():
         file_location=KEY_LOC,
     )
 
-    assistant = RetrieveAssistantAgent(
+    assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
diff --git a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
index 85f098c64b1..92ca5aa603a 100755
--- a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
+++ b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
@@ -5,8 +5,7 @@
 
 import pytest
 
-from autogen import config_list_from_json
-from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
+from autogen import AssistantAgent, config_list_from_json
 
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
 from conftest import skip_openai  # noqa: E402
@@ -51,7 +50,7 @@ def test_retrievechat():
         file_location=KEY_LOC,
     )
 
-    assistant = RetrieveAssistantAgent(
+    assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
diff --git a/test/agentchat/contrib/retrievechat/test_retrievechat.py b/test/agentchat/contrib/retrievechat/test_retrievechat.py
index ceb97357785..0504fc82be4 100755
--- a/test/agentchat/contrib/retrievechat/test_retrievechat.py
+++ b/test/agentchat/contrib/retrievechat/test_retrievechat.py
@@ -18,9 +18,7 @@
     import openai
     from chromadb.utils import embedding_functions as ef
 
-    from autogen.agentchat.contrib.retrieve_assistant_agent import (
-        RetrieveAssistantAgent,
-    )
+    from autogen import AssistantAgent
     from autogen.agentchat.contrib.retrieve_user_proxy_agent import (
         RetrieveUserProxyAgent,
     )
@@ -45,7 +43,7 @@ def test_retrievechat():
         file_location=KEY_LOC,
     )
 
-    assistant = RetrieveAssistantAgent(
+    assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
diff --git a/test/agentchat/contrib/test_compressible_agent.py b/test/agentchat/contrib/test_compressible_agent.py
deleted file mode 100755
index 677dd47a951..00000000000
--- a/test/agentchat/contrib/test_compressible_agent.py
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python3 -m pytest
-
-import os
-import sys
-
-import pytest
-
-import autogen
-from autogen.agentchat.contrib.compressible_agent import CompressibleAgent
-
-sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
-from conftest import skip_openai  # noqa: E402
-
-here = os.path.abspath(os.path.dirname(__file__))
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
-
-try:
-    import openai
-except ImportError:
-    skip = True
-else:
-    skip = False or skip_openai
-
-if not skip:
-    config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        filter_dict={
-            "model": ["gpt-3.5-turbo", "gpt-35-turbo", "gpt-3.5-turbo-16k", "gpt-35-turbo-16k"],
-        },
-    )
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_mode_compress():
-    conversations = {}
-
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config={
-            "mode": "COMPRESS",
-            "trigger_count": 600,
-            "verbose": True,
-        },
-    )
-
-    user_proxy = autogen.UserProxyAgent(
-        name="user_proxy",
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=5,
-        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE")
-        or x.get("content", "").rstrip().endswith("TERMINATE."),
-        code_execution_config={"work_dir": here},
-    )
-
-    user_proxy.initiate_chat(
-        assistant,
-        message="Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.",
-    )
-
-    assistant.reset()
-    print(conversations)
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_mode_customized():
-    try:
-        assistant = CompressibleAgent(
-            name="assistant",
-            llm_config={
-                "timeout": 600,
-                "cache_seed": 43,
-                "config_list": config_list,
-                "model": "gpt-3.5-turbo",
-            },
-            compress_config={
-                "mode": "CUSTOMIZED",
-            },
-        )
-    except ValueError:
-        print("ValueError raised as expected.")
-
-    def constrain_num_messages(messages):
-        """Constrain the number of messages to 3.
-
-        This is an example of a customized compression function.
-
-        Returns:
-            bool: whether the compression is successful.
-            list: the compressed messages.
-        """
-        if len(messages) <= 3:
-            # do nothing
-            return False, None
-
-        # save the first and last two messages
-        return True, messages[:1] + messages[-2:]
-
-    # create a CompressibleAgent instance named "assistant"
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config={
-            "mode": "CUSTOMIZED",
-            "compress_function": constrain_num_messages,  # this is required for customized compression
-            "trigger_count": 1000,
-        },
-    )
-
-    # create a UserProxyAgent instance named "user_proxy"
-    user_proxy = autogen.UserProxyAgent(
-        name="user_proxy",
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=5,
-        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE")
-        or x.get("content", "").rstrip().endswith("TERMINATE."),
-        code_execution_config={"work_dir": "web"},
-        system_message="""Reply TERMINATE if the task has been solved at full satisfaction.
-    Otherwise, reply CONTINUE, or the reason why the task is not solved yet.""",
-    )
-
-    user_proxy.initiate_chat(
-        assistant,
-        message="""Show me the YTD gain of 10 largest technology companies as of today.""",
-    )
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_compress_message():
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config={
-            "mode": "COMPRESS",
-            "trigger_count": 600,
-            "verbose": True,
-            "leave_last_n": 0,
-        },
-    )
-
-    assert assistant.compress_messages([{"content": "hello world", "role": "user"}]) == (
-        False,
-        None,
-    ), "Single message should not be compressed"
-
-    is_success, _ = assistant.compress_messages(
-        [
-            {"content": "Hello!", "role": "user"},
-            {"content": "How can I help you today?", "role": "assistant"},
-            {"content": "Can you tell me a joke about programming?", "role": "assistant"},
-        ]
-    )
-    assert is_success, "Compression failed."
-
-
-@pytest.mark.skipif(True, reason="Flaky test, CompressibleAgent no longer supported")
-def test_mode_terminate():
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config=True,
-    )
-
-    user_proxy = autogen.UserProxyAgent(
-        name="user_proxy",
-        is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=5,
-        code_execution_config={"work_dir": "coding"},
-    )
-
-    final, _ = assistant.on_oai_token_limit(
-        [
-            {"content": "Hello!", "role": "user"},
-            {"content": "How can I help you today?", "role": "assistant"},
-            {"content": "1&" * 5000, "role": "assistant"},
-        ],
-        sender=user_proxy,
-    )
-    assert final, "Terminating the conversation at max token limit is not working."
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_new_compressible_agent_description():
-    assistant = CompressibleAgent(name="assistant", description="this is a description", llm_config=False)
-
-    assert assistant.description == "this is a description", "description is not set correctly"
-
-
-if __name__ == "__main__":
-    # test_mode_compress()
-    # test_mode_customized()
-    # test_compress_message()
-    # test_mode_terminate()
-    test_new_compressible_agent_description()
diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py
index 20a83685178..39e8fb06302 100755
--- a/test/agentchat/test_groupchat.py
+++ b/test/agentchat/test_groupchat.py
@@ -12,6 +12,7 @@
 
 import autogen
 from autogen import Agent, AssistantAgent, GroupChat, GroupChatManager
+from autogen.agentchat.contrib.capabilities import transform_messages, transforms
 from autogen.exception_utils import AgentNameConflict, UndefinedNextAgent
 
 
@@ -724,7 +725,7 @@ def test_clear_agents_history():
     agent1_history = list(agent1._oai_messages.values())[0]
     agent2_history = list(agent2._oai_messages.values())[0]
     assert agent1_history == [
-        {"content": "hello", "role": "assistant"},
+        {"content": "hello", "role": "assistant", "name": "alice"},
         {"content": "This is bob speaking.", "name": "bob", "role": "user"},
         {"content": "How you doing?", "name": "sam", "role": "user"},
     ]
@@ -745,7 +746,7 @@ def test_clear_agents_history():
         {"content": "How you doing?", "name": "sam", "role": "user"},
     ]
     assert agent2_history == [
-        {"content": "This is bob speaking.", "role": "assistant"},
+        {"content": "This is bob speaking.", "role": "assistant", "name": "bob"},
         {"content": "How you doing?", "name": "sam", "role": "user"},
     ]
     assert groupchat.messages == [
@@ -759,12 +760,12 @@ def test_clear_agents_history():
     agent1_history = list(agent1._oai_messages.values())[0]
     agent2_history = list(agent2._oai_messages.values())[0]
     assert agent1_history == [
-        {"content": "hello", "role": "assistant"},
+        {"content": "hello", "role": "assistant", "name": "alice"},
         {"content": "This is bob speaking.", "name": "bob", "role": "user"},
         {"content": "How you doing?", "name": "sam", "role": "user"},
     ]
     assert agent2_history == [
-        {"content": "This is bob speaking.", "role": "assistant"},
+        {"content": "This is bob speaking.", "role": "assistant", "name": "bob"},
         {"content": "How you doing?", "name": "sam", "role": "user"},
     ]
     assert groupchat.messages == [
@@ -822,6 +823,7 @@ def test_clear_agents_history():
             "content": "example tool response",
             "tool_responses": [{"tool_call_id": "call_emulated", "role": "tool", "content": "example tool response"}],
             "role": "tool",
+            "name": "alice",
         },
     ]
 
@@ -1218,7 +1220,7 @@ def test_role_for_select_speaker_messages():
     # into a message attribute called 'override_role'. This is evaluated in Conversable Agent's _append_oai_message function
     # e.g.: message={'content':self.select_speaker_prompt(agents),'override_role':self.role_for_select_speaker_messages},
     message = {"content": "A prompt goes here.", "override_role": groupchat.role_for_select_speaker_messages}
-    checking_agent._append_oai_message(message, "assistant", speaker_selection_agent)
+    checking_agent._append_oai_message(message, "assistant", speaker_selection_agent, is_sending=True)
 
     # Test default is "system"
     assert len(checking_agent.chat_messages) == 1
@@ -1227,7 +1229,7 @@ def test_role_for_select_speaker_messages():
     # Test as "user"
     groupchat.role_for_select_speaker_messages = "user"
     message = {"content": "A prompt goes here.", "override_role": groupchat.role_for_select_speaker_messages}
-    checking_agent._append_oai_message(message, "assistant", speaker_selection_agent)
+    checking_agent._append_oai_message(message, "assistant", speaker_selection_agent, is_sending=True)
 
     assert len(checking_agent.chat_messages) == 1
     assert checking_agent.chat_messages[speaker_selection_agent][-1]["role"] == "user"
@@ -1235,7 +1237,7 @@ def test_role_for_select_speaker_messages():
     # Test as something unusual
     groupchat.role_for_select_speaker_messages = "SockS"
     message = {"content": "A prompt goes here.", "override_role": groupchat.role_for_select_speaker_messages}
-    checking_agent._append_oai_message(message, "assistant", speaker_selection_agent)
+    checking_agent._append_oai_message(message, "assistant", speaker_selection_agent, is_sending=True)
 
     assert len(checking_agent.chat_messages) == 1
     assert checking_agent.chat_messages[speaker_selection_agent][-1]["role"] == "SockS"
@@ -1646,6 +1648,7 @@ def test_speaker_selection_validate_speaker_name():
         True,
         {
             "content": groupchat.select_speaker_auto_multiple_template.format(agentlist=agent_list_string),
+            "name": "checking_agent",
             "override_role": groupchat.role_for_select_speaker_messages,
         },
     )
@@ -1692,6 +1695,7 @@ def test_speaker_selection_validate_speaker_name():
         True,
         {
             "content": groupchat.select_speaker_auto_none_template.format(agentlist=agent_list_string),
+            "name": "checking_agent",
             "override_role": groupchat.role_for_select_speaker_messages,
         },
     )
@@ -1761,6 +1765,7 @@ def test_select_speaker_auto_messages():
         True,
         {
             "content": custom_multiple_names_msg.replace("{agentlist}", "['Alice', 'Bob']"),
+            "name": "checking_agent",
             "override_role": groupchat.role_for_select_speaker_messages,
         },
     )
@@ -1770,6 +1775,7 @@ def test_select_speaker_auto_messages():
         True,
         {
             "content": custom_no_names_msg.replace("{agentlist}", "['Alice', 'Bob']"),
+            "name": "checking_agent",
             "override_role": groupchat.role_for_select_speaker_messages,
         },
     )
@@ -2056,6 +2062,46 @@ def test_manager_resume_messages():
         return_agent, return_message = manager.resume(messages="Let's get this conversation started.")
 
 
+def test_select_speaker_transform_messages():
+    """Tests adding transform messages to a GroupChat for speaker selection when in 'auto' mode"""
+
+    # Test adding a TransformMessages to a group chat
+    test_add_transforms = transform_messages.TransformMessages(
+        transforms=[
+            transforms.MessageHistoryLimiter(max_messages=10),
+            transforms.MessageTokenLimiter(max_tokens=3000, max_tokens_per_message=500, min_tokens=300),
+        ]
+    )
+
+    coder = AssistantAgent(name="Coder", llm_config=None)
+    groupchat = GroupChat(messages=[], agents=[coder], select_speaker_transform_messages=test_add_transforms)
+
+    # Ensure the transform have been added to the GroupChat
+    assert groupchat._speaker_selection_transforms == test_add_transforms
+
+    # Attempt to add a non MessageTransforms object, such as a list of transforms
+    with pytest.raises(ValueError, match="select_speaker_transform_messages must be None or MessageTransforms."):
+        groupchat = GroupChat(
+            messages=[],
+            agents=[coder],
+            select_speaker_transform_messages=[transforms.MessageHistoryLimiter(max_messages=10)],
+        )
+
+    # Ensure if we don't pass any transforms in, none are on the GroupChat
+    groupchat_missing = GroupChat(messages=[], agents=[coder])
+
+    assert groupchat_missing._speaker_selection_transforms is None
+
+    # Ensure we can pass in None
+    groupchat_none = GroupChat(
+        messages=[],
+        agents=[coder],
+        select_speaker_transform_messages=None,
+    )
+
+    assert groupchat_none._speaker_selection_transforms is None
+
+
 if __name__ == "__main__":
     # test_func_call_groupchat()
     # test_broadcast()
@@ -2084,4 +2130,5 @@ def test_manager_resume_messages():
     test_manager_resume_functions()
     # test_manager_resume_returns()
     # test_manager_resume_messages()
+    # test_select_speaker_transform_messages()
     pass
diff --git a/test/agentchat/test_nested.py b/test/agentchat/test_nested.py
index ee8da793fde..04fc84b5b39 100755
--- a/test/agentchat/test_nested.py
+++ b/test/agentchat/test_nested.py
@@ -2,10 +2,12 @@
 
 import os
 import sys
+from typing import List
 
 import pytest
 
 import autogen
+from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
 
 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
@@ -13,6 +15,23 @@
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 
+class MockAgentReplies(AgentCapability):
+    def __init__(self, mock_messages: List[str]):
+        self.mock_messages = mock_messages
+        self.mock_message_index = 0
+
+    def add_to_agent(self, agent: autogen.ConversableAgent):
+        def mock_reply(recipient, messages, sender, config):
+            if self.mock_message_index < len(self.mock_messages):
+                reply_msg = self.mock_messages[self.mock_message_index]
+                self.mock_message_index += 1
+                return [True, reply_msg]
+            else:
+                raise ValueError(f"No more mock messages available for {sender.name} to reply to {recipient.name}")
+
+        agent.register_reply([autogen.Agent, None], mock_reply, position=2)
+
+
 @pytest.mark.skipif(skip_openai, reason=reason)
 def test_nested():
     config_list = autogen.config_list_from_json(env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC)
@@ -142,5 +161,216 @@ def writing_message(recipient, messages, sender, config):
     )
 
 
+def test_sync_nested_chat():
+    def is_termination(msg):
+        if isinstance(msg, str) and msg == "FINAL_RESULT":
+            return True
+        elif isinstance(msg, dict) and msg.get("content") == "FINAL_RESULT":
+            return True
+        return False
+
+    inner_assistant = autogen.AssistantAgent(
+        "Inner-assistant",
+        is_termination_msg=is_termination,
+    )
+    MockAgentReplies(["Inner-assistant message 1", "Inner-assistant message 2"]).add_to_agent(inner_assistant)
+
+    inner_assistant_2 = autogen.AssistantAgent(
+        "Inner-assistant-2",
+    )
+    MockAgentReplies(["Inner-assistant-2 message 1", "Inner-assistant-2 message 2", "FINAL_RESULT"]).add_to_agent(
+        inner_assistant_2
+    )
+
+    assistant = autogen.AssistantAgent(
+        "Assistant",
+    )
+    user = autogen.UserProxyAgent(
+        "User",
+        human_input_mode="NEVER",
+        is_termination_msg=is_termination,
+    )
+    assistant.register_nested_chats(
+        [{"sender": inner_assistant, "recipient": inner_assistant_2, "summary_method": "last_msg"}], trigger=user
+    )
+    chat_result = user.initiate_chat(assistant, message="Start chat")
+    assert len(chat_result.chat_history) == 2
+    chat_messages = [msg["content"] for msg in chat_result.chat_history]
+    assert chat_messages == ["Start chat", "FINAL_RESULT"]
+
+
+@pytest.mark.asyncio
+async def test_async_nested_chat():
+    def is_termination(msg):
+        if isinstance(msg, str) and msg == "FINAL_RESULT":
+            return True
+        elif isinstance(msg, dict) and msg.get("content") == "FINAL_RESULT":
+            return True
+        return False
+
+    inner_assistant = autogen.AssistantAgent(
+        "Inner-assistant",
+        is_termination_msg=is_termination,
+    )
+    MockAgentReplies(["Inner-assistant message 1", "Inner-assistant message 2"]).add_to_agent(inner_assistant)
+
+    inner_assistant_2 = autogen.AssistantAgent(
+        "Inner-assistant-2",
+    )
+    MockAgentReplies(["Inner-assistant-2 message 1", "Inner-assistant-2 message 2", "FINAL_RESULT"]).add_to_agent(
+        inner_assistant_2
+    )
+
+    assistant = autogen.AssistantAgent(
+        "Assistant",
+    )
+    user = autogen.UserProxyAgent(
+        "User",
+        human_input_mode="NEVER",
+        is_termination_msg=is_termination,
+    )
+    assistant.register_nested_chats(
+        [{"sender": inner_assistant, "recipient": inner_assistant_2, "summary_method": "last_msg", "chat_id": 1}],
+        trigger=user,
+        use_async=True,
+    )
+    chat_result = await user.a_initiate_chat(assistant, message="Start chat")
+    assert len(chat_result.chat_history) == 2
+    chat_messages = [msg["content"] for msg in chat_result.chat_history]
+    assert chat_messages == ["Start chat", "FINAL_RESULT"]
+
+
+@pytest.mark.asyncio
+async def test_async_nested_chat_chat_id_validation():
+    def is_termination(msg):
+        if isinstance(msg, str) and msg == "FINAL_RESULT":
+            return True
+        elif isinstance(msg, dict) and msg.get("content") == "FINAL_RESULT":
+            return True
+        return False
+
+    inner_assistant = autogen.AssistantAgent(
+        "Inner-assistant",
+        is_termination_msg=is_termination,
+    )
+    MockAgentReplies(["Inner-assistant message 1", "Inner-assistant message 2"]).add_to_agent(inner_assistant)
+
+    inner_assistant_2 = autogen.AssistantAgent(
+        "Inner-assistant-2",
+    )
+    MockAgentReplies(["Inner-assistant-2 message 1", "Inner-assistant-2 message 2", "FINAL_RESULT"]).add_to_agent(
+        inner_assistant_2
+    )
+
+    assistant = autogen.AssistantAgent(
+        "Assistant",
+    )
+    user = autogen.UserProxyAgent(
+        "User",
+        human_input_mode="NEVER",
+        is_termination_msg=is_termination,
+    )
+    with pytest.raises(ValueError, match="chat_id is required for async nested chats"):
+        assistant.register_nested_chats(
+            [{"sender": inner_assistant, "recipient": inner_assistant_2, "summary_method": "last_msg"}],
+            trigger=user,
+            use_async=True,
+        )
+
+
+def test_sync_nested_chat_in_group():
+    def is_termination(msg):
+        if isinstance(msg, str) and msg == "FINAL_RESULT":
+            return True
+        elif isinstance(msg, dict) and msg.get("content") == "FINAL_RESULT":
+            return True
+        return False
+
+    inner_assistant = autogen.AssistantAgent(
+        "Inner-assistant",
+        is_termination_msg=is_termination,
+    )
+    MockAgentReplies(["Inner-assistant message 1", "Inner-assistant message 2"]).add_to_agent(inner_assistant)
+
+    inner_assistant_2 = autogen.AssistantAgent(
+        "Inner-assistant-2",
+    )
+    MockAgentReplies(["Inner-assistant-2 message 1", "Inner-assistant-2 message 2", "FINAL_RESULT"]).add_to_agent(
+        inner_assistant_2
+    )
+
+    assistant = autogen.AssistantAgent(
+        "Assistant_In_Group_1",
+    )
+    MockAgentReplies(["Assistant_In_Group_1 message 1"]).add_to_agent(assistant)
+    assistant2 = autogen.AssistantAgent(
+        "Assistant_In_Group_2",
+    )
+    user = autogen.UserProxyAgent("User", human_input_mode="NEVER", is_termination_msg=is_termination)
+    group = autogen.GroupChat(
+        agents=[assistant, assistant2, user],
+        messages=[],
+        speaker_selection_method="round_robin",
+    )
+    group_manager = autogen.GroupChatManager(groupchat=group)
+    assistant2.register_nested_chats(
+        [{"sender": inner_assistant, "recipient": inner_assistant_2, "summary_method": "last_msg"}],
+        trigger=group_manager,
+    )
+
+    chat_result = user.initiate_chat(group_manager, message="Start chat", summary_method="last_msg")
+    assert len(chat_result.chat_history) == 3
+    chat_messages = [msg["content"] for msg in chat_result.chat_history]
+    assert chat_messages == ["Start chat", "Assistant_In_Group_1 message 1", "FINAL_RESULT"]
+
+
+@pytest.mark.asyncio
+async def test_async_nested_chat_in_group():
+    def is_termination(msg):
+        if isinstance(msg, str) and msg == "FINAL_RESULT":
+            return True
+        elif isinstance(msg, dict) and msg.get("content") == "FINAL_RESULT":
+            return True
+        return False
+
+    inner_assistant = autogen.AssistantAgent(
+        "Inner-assistant",
+        is_termination_msg=is_termination,
+    )
+    MockAgentReplies(["Inner-assistant message 1", "Inner-assistant message 2"]).add_to_agent(inner_assistant)
+
+    inner_assistant_2 = autogen.AssistantAgent(
+        "Inner-assistant-2",
+    )
+    MockAgentReplies(["Inner-assistant-2 message 1", "Inner-assistant-2 message 2", "FINAL_RESULT"]).add_to_agent(
+        inner_assistant_2
+    )
+
+    assistant = autogen.AssistantAgent(
+        "Assistant_In_Group_1",
+    )
+    MockAgentReplies(["Assistant_In_Group_1 message 1"]).add_to_agent(assistant)
+    assistant2 = autogen.AssistantAgent(
+        "Assistant_In_Group_2",
+    )
+    user = autogen.UserProxyAgent("User", human_input_mode="NEVER", is_termination_msg=is_termination)
+    group = autogen.GroupChat(
+        agents=[assistant, assistant2, user],
+        messages=[],
+        speaker_selection_method="round_robin",
+    )
+    group_manager = autogen.GroupChatManager(groupchat=group)
+    assistant2.register_nested_chats(
+        [{"sender": inner_assistant, "recipient": inner_assistant_2, "summary_method": "last_msg", "chat_id": 1}],
+        trigger=group_manager,
+        use_async=True,
+    )
+
+    chat_result = await user.a_initiate_chat(group_manager, message="Start chat", summary_method="last_msg")
+    assert len(chat_result.chat_history) == 3
+    chat_messages = [msg["content"] for msg in chat_result.chat_history]
+    assert chat_messages == ["Start chat", "Assistant_In_Group_1 message 1", "FINAL_RESULT"]
+
+
 if __name__ == "__main__":
     test_nested()
diff --git a/test/oai/test_bedrock.py b/test/oai/test_bedrock.py
new file mode 100644
index 00000000000..42502acf691
--- /dev/null
+++ b/test/oai/test_bedrock.py
@@ -0,0 +1,294 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+try:
+    from autogen.oai.bedrock import BedrockClient, oai_messages_to_bedrock_messages
+
+    skip = False
+except ImportError:
+    BedrockClient = object
+    InternalServerError = object
+    skip = True
+
+
+# Fixtures for mock data
+@pytest.fixture
+def mock_response():
+    class MockResponse:
+        def __init__(self, text, choices, usage, cost, model):
+            self.text = text
+            self.choices = choices
+            self.usage = usage
+            self.cost = cost
+            self.model = model
+
+    return MockResponse
+
+
+@pytest.fixture
+def bedrock_client():
+
+    # Set Bedrock client with some default values
+    client = BedrockClient()
+
+    client._supports_system_prompts = True
+
+    return client
+
+
+skip_reason = "Amazon Bedrock dependency is not installed"
+
+
+# Test initialization and configuration
+@pytest.mark.skipif(skip, reason=skip_reason)
+def test_initialization():
+
+    # Creation works without an api_key as it's handled in the parameter parsing
+    BedrockClient()
+
+
+# Test parameters
+@pytest.mark.skipif(skip, reason=skip_reason)
+def test_parsing_params(bedrock_client):
+    # All parameters (with default values)
+    params = {
+        # "aws_region_name": "us-east-1",
+        # "aws_access_key_id": "test_access_key_id",
+        # "aws_secret_access_key": "test_secret_access_key",
+        # "aws_session_token": "test_session_token",
+        # "aws_profile_name": "test_profile_name",
+        "model": "anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": 0.8,
+        "topP": 0.6,
+        "maxTokens": 250,
+        "seed": 42,
+        "stream": False,
+    }
+    expected_base_params = {
+        "temperature": 0.8,
+        "topP": 0.6,
+        "maxTokens": 250,
+    }
+    expected_additional_params = {
+        "seed": 42,
+    }
+    base_result, additional_result = bedrock_client.parse_params(params)
+    assert base_result == expected_base_params
+    assert additional_result == expected_additional_params
+
+    # Incorrect types, defaults should be set, will show warnings but not trigger assertions
+    params = {
+        "model": "anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": "0.5",
+        "topP": "0.6",
+        "maxTokens": "250",
+        "seed": "42",
+        "stream": "False",
+    }
+    expected_base_params = {
+        "temperature": None,
+        "topP": None,
+        "maxTokens": None,
+    }
+    expected_additional_params = {
+        "seed": None,
+    }
+    base_result, additional_result = bedrock_client.parse_params(params)
+    assert base_result == expected_base_params
+    assert additional_result == expected_additional_params
+
+    # Only model, others set as defaults if they are mandatory
+    params = {
+        "model": "anthropic.claude-3-sonnet-20240229-v1:0",
+    }
+    expected_base_params = {}
+    expected_additional_params = {}
+    base_result, additional_result = bedrock_client.parse_params(params)
+    assert base_result == expected_base_params
+    assert additional_result == expected_additional_params
+
+    # No model
+    params = {
+        "temperature": 0.8,
+    }
+
+    with pytest.raises(AssertionError) as assertinfo:
+        bedrock_client.parse_params(params)
+
+    assert "Please provide the 'model` in the config_list to use Amazon Bedrock" in str(assertinfo.value)
+
+
+# Test text generation
+@pytest.mark.skipif(skip, reason=skip_reason)
+@patch("autogen.oai.bedrock.BedrockClient.create")
+def test_create_response(mock_chat, bedrock_client):
+    # Mock BedrockClient.chat response
+    mock_bedrock_response = MagicMock()
+    mock_bedrock_response.choices = [
+        MagicMock(finish_reason="stop", message=MagicMock(content="Example Bedrock response", tool_calls=None))
+    ]
+    mock_bedrock_response.id = "mock_bedrock_response_id"
+    mock_bedrock_response.model = "anthropic.claude-3-sonnet-20240229-v1:0"
+    mock_bedrock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20)  # Example token usage
+
+    mock_chat.return_value = mock_bedrock_response
+
+    # Test parameters
+    params = {
+        "messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "World"}],
+        "model": "anthropic.claude-3-sonnet-20240229-v1:0",
+    }
+
+    # Call the create method
+    response = bedrock_client.create(params)
+
+    # Assertions to check if response is structured as expected
+    assert (
+        response.choices[0].message.content == "Example Bedrock response"
+    ), "Response content should match expected output"
+    assert response.id == "mock_bedrock_response_id", "Response ID should match the mocked response ID"
+    assert (
+        response.model == "anthropic.claude-3-sonnet-20240229-v1:0"
+    ), "Response model should match the mocked response model"
+    assert response.usage.prompt_tokens == 10, "Response prompt tokens should match the mocked response usage"
+    assert response.usage.completion_tokens == 20, "Response completion tokens should match the mocked response usage"
+
+
+# Test functions/tools
+@pytest.mark.skipif(skip, reason=skip_reason)
+@patch("autogen.oai.bedrock.BedrockClient.create")
+def test_create_response_with_tool_call(mock_chat, bedrock_client):
+    # Mock BedrockClient.chat response
+    mock_function = MagicMock(name="currency_calculator")
+    mock_function.name = "currency_calculator"
+    mock_function.arguments = '{"base_currency": "EUR", "quote_currency": "USD", "base_amount": 123.45}'
+
+    mock_function_2 = MagicMock(name="get_weather")
+    mock_function_2.name = "get_weather"
+    mock_function_2.arguments = '{"location": "New York"}'
+
+    mock_chat.return_value = MagicMock(
+        choices=[
+            MagicMock(
+                finish_reason="tool_calls",
+                message=MagicMock(
+                    content="Sample text about the functions",
+                    tool_calls=[
+                        MagicMock(id="bd65600d-8669-4903-8a14-af88203add38", function=mock_function),
+                        MagicMock(id="f50ec0b7-f960-400d-91f0-c42a6d44e3d0", function=mock_function_2),
+                    ],
+                ),
+            )
+        ],
+        id="mock_bedrock_response_id",
+        model="anthropic.claude-3-sonnet-20240229-v1:0",
+        usage=MagicMock(prompt_tokens=10, completion_tokens=20),
+    )
+
+    # Construct parameters
+    converted_functions = [
+        {
+            "type": "function",
+            "function": {
+                "description": "Currency exchange calculator.",
+                "name": "currency_calculator",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "base_amount": {"type": "number", "description": "Amount of currency in base_currency"},
+                    },
+                    "required": ["base_amount"],
+                },
+            },
+        }
+    ]
+    bedrock_messages = [
+        {"role": "user", "content": "How much is 123.45 EUR in USD?"},
+        {"role": "assistant", "content": "World"},
+    ]
+
+    # Call the create method
+    response = bedrock_client.create(
+        {"messages": bedrock_messages, "tools": converted_functions, "model": "anthropic.claude-3-sonnet-20240229-v1:0"}
+    )
+
+    # Assertions to check if the functions and content are included in the response
+    assert response.choices[0].message.content == "Sample text about the functions"
+    assert response.choices[0].message.tool_calls[0].function.name == "currency_calculator"
+    assert response.choices[0].message.tool_calls[1].function.name == "get_weather"
+
+
+# Test message conversion from OpenAI to Bedrock format
+@pytest.mark.skipif(skip, reason=skip_reason)
+def test_oai_messages_to_bedrock_messages(bedrock_client):
+
+    # Test that the "name" key is removed and system messages converted to user message
+    test_messages = [
+        {"role": "system", "content": "You are a helpful AI bot."},
+        {"role": "user", "name": "anne", "content": "Why is the sky blue?"},
+    ]
+    messages = oai_messages_to_bedrock_messages(test_messages, False, False)
+
+    expected_messages = [
+        {"role": "user", "content": [{"text": "You are a helpful AI bot."}]},
+        {"role": "assistant", "content": [{"text": "Please continue."}]},
+        {"role": "user", "content": [{"text": "Why is the sky blue?"}]},
+    ]
+
+    assert messages == expected_messages, "'name' was not removed from messages (system message should be user message)"
+
+    # Test that the "name" key is removed and system messages are extracted (as they will be put in separately)
+    test_messages = [
+        {"role": "system", "content": "You are a helpful AI bot."},
+        {"role": "user", "name": "anne", "content": "Why is the sky blue?"},
+    ]
+    messages = oai_messages_to_bedrock_messages(test_messages, False, True)
+
+    expected_messages = [
+        {"role": "user", "content": [{"text": "Why is the sky blue?"}]},
+    ]
+
+    assert messages == expected_messages, "'name' was not removed from messages (system messages excluded)"
+
+    # Test that the system message is converted to user and that a continue message is inserted
+    test_messages = [
+        {"role": "system", "content": "You are a helpful AI bot."},
+        {"role": "user", "name": "anne", "content": "Why is the sky blue?"},
+        {"role": "system", "content": "Summarise the conversation."},
+    ]
+
+    messages = oai_messages_to_bedrock_messages(test_messages, False, False)
+
+    expected_messages = [
+        {"role": "user", "content": [{"text": "You are a helpful AI bot."}]},
+        {"role": "assistant", "content": [{"text": "Please continue."}]},
+        {"role": "user", "content": [{"text": "Why is the sky blue?"}]},
+        {"role": "assistant", "content": [{"text": "Please continue."}]},
+        {"role": "user", "content": [{"text": "Summarise the conversation."}]},
+    ]
+
+    assert (
+        messages == expected_messages
+    ), "Final 'system' message was not changed to 'user' or continue messages not included"
+
+    # Test that the last message is a user or system message and if not, add a continue message
+    test_messages = [
+        {"role": "system", "content": "You are a helpful AI bot."},
+        {"role": "user", "name": "anne", "content": "Why is the sky blue?"},
+        {"role": "assistant", "content": "The sky is blue because that's a great colour."},
+    ]
+    print(test_messages)
+
+    messages = oai_messages_to_bedrock_messages(test_messages, False, False)
+    print(messages)
+
+    expected_messages = [
+        {"role": "user", "content": [{"text": "You are a helpful AI bot."}]},
+        {"role": "assistant", "content": [{"text": "Please continue."}]},
+        {"role": "user", "content": [{"text": "Why is the sky blue?"}]},
+        {"role": "assistant", "content": [{"text": "The sky is blue because that's a great colour."}]},
+        {"role": "user", "content": [{"text": "Please continue."}]},
+    ]
+
+    assert messages == expected_messages, "'Please continue' message was not appended."
diff --git a/test/oai/test_mistral.py b/test/oai/test_mistral.py
index 5236f71d7b7..f89c3d304d9 100644
--- a/test/oai/test_mistral.py
+++ b/test/oai/test_mistral.py
@@ -3,7 +3,16 @@
 import pytest
 
 try:
-    from mistralai.models.chat_completion import ChatMessage
+    from mistralai import (
+        AssistantMessage,
+        Function,
+        FunctionCall,
+        Mistral,
+        SystemMessage,
+        ToolCall,
+        ToolMessage,
+        UserMessage,
+    )
 
     from autogen.oai.mistral import MistralAIClient, calculate_mistral_cost
 
@@ -66,17 +75,16 @@ def test_cost_calculation(mock_response):
         cost=None,
         model="mistral-large-latest",
     )
-    assert (
-        calculate_mistral_cost(response.usage["prompt_tokens"], response.usage["completion_tokens"], response.model)
-        == 0.0001
-    ), "Cost for this should be $0.0001"
+    assert calculate_mistral_cost(
+        response.usage["prompt_tokens"], response.usage["completion_tokens"], response.model
+    ) == (15 / 1000 * 0.0003), "Cost for this should be $0.0000045"
 
 
 # Test text generation
 @pytest.mark.skipif(skip, reason="Mistral.AI dependency is not installed")
-@patch("autogen.oai.mistral.MistralClient.chat")
+@patch("autogen.oai.mistral.MistralAIClient.create")
 def test_create_response(mock_chat, mistral_client):
-    # Mock MistralClient.chat response
+    # Mock `mistral_response = client.chat.complete(**mistral_params)`
     mock_mistral_response = MagicMock()
     mock_mistral_response.choices = [
         MagicMock(finish_reason="stop", message=MagicMock(content="Example Mistral response", tool_calls=None))
@@ -108,9 +116,9 @@ def test_create_response(mock_chat, mistral_client):
 
 # Test functions/tools
 @pytest.mark.skipif(skip, reason="Mistral.AI dependency is not installed")
-@patch("autogen.oai.mistral.MistralClient.chat")
+@patch("autogen.oai.mistral.MistralAIClient.create")
 def test_create_response_with_tool_call(mock_chat, mistral_client):
-    # Mock `mistral_response = client.chat(**mistral_params)`
+    # Mock `mistral_response = client.chat.complete(**mistral_params)`
     mock_function = MagicMock(name="currency_calculator")
     mock_function.name = "currency_calculator"
     mock_function.arguments = '{"base_currency": "EUR", "quote_currency": "USD", "base_amount": 123.45}'
@@ -159,7 +167,7 @@ def test_create_response_with_tool_call(mock_chat, mistral_client):
         {"role": "assistant", "content": "World"},
     ]
 
-    # Call the create method
+    # Call the chat method
     response = mistral_client.create(
         {"messages": mistral_messages, "tools": converted_functions, "model": "mistral-medium-latest"}
     )
diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx
index 12ee0305132..91b8b5012a3 100644
--- a/website/blog/2023-10-18-RetrieveChat/index.mdx
+++ b/website/blog/2023-10-18-RetrieveChat/index.mdx
@@ -4,12 +4,12 @@ authors: thinkall
 tags: [LLM, RAG]
 ---
 
-*Last update: April 4, 2024; AutoGen version: v0.2.21*
+*Last update: August 14, 2024; AutoGen version: v0.2.35*
 
 ![RAG Architecture](img/retrievechat-arch.png)
 
 **TL;DR:**
-* We introduce **RetrieveUserProxyAgent** and **RetrieveAssistantAgent**, RAG agents of AutoGen that
+* We introduce **RetrieveUserProxyAgent**, RAG agents of AutoGen that
 allows retrieval-augmented generation, and its basic usage.
 * We showcase customizations of RAG agents, such as customizing the embedding function, the text
 split function and vector database.
@@ -21,8 +21,9 @@ application with Gradio.
 Retrieval augmentation has emerged as a practical and effective approach for mitigating the intrinsic
 limitations of LLMs by incorporating external documents. In this blog post, we introduce RAG agents of
 AutoGen that allows retrieval-augmented generation. The system consists of two agents: a
-Retrieval-augmented User Proxy agent, called `RetrieveUserProxyAgent`, and a Retrieval-augmented Assistant
-agent, called `RetrieveAssistantAgent`, both of which are extended from built-in agents from AutoGen.
+Retrieval-augmented User Proxy agent, called `RetrieveUserProxyAgent`, and an Assistant
+agent, called `RetrieveAssistantAgent`; `RetrieveUserProxyAgent` is extended from built-in agents from AutoGen,
+while `RetrieveAssistantAgent` can be any conversable agent with LLM configured.
 The overall architecture of the RAG agents is shown in the figure above.
 
 To use Retrieval-augmented Chat, one needs to initialize two agents including Retrieval-augmented
@@ -75,13 +76,17 @@ You can find a list of all supported document types by using `autogen.retrieve_u
 1. Import Agents
 ```python
 import autogen
-from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
+from autogen import AssistantAgent
 from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
 ```
 
-2. Create an 'RetrieveAssistantAgent' instance named "assistant" and an 'RetrieveUserProxyAgent' instance named "ragproxyagent"
+2. Create an 'AssistantAgent' instance named "assistant" and an 'RetrieveUserProxyAgent' instance named "ragproxyagent"
+
+Refer to the [doc](https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent)
+for more information on the detailed configurations.
+
 ```python
-assistant = RetrieveAssistantAgent(
+assistant = AssistantAgent(
     name="assistant",
     system_message="You are a helpful assistant.",
     llm_config=llm_config,
@@ -195,93 +200,12 @@ ragproxyagent = RetrieveUserProxyAgent(
 
 
 ### Customizing Vector Database
-We are using chromadb as the default vector database, you can also replace it with any other vector database
-by simply overriding the function `retrieve_docs` of `RetrieveUserProxyAgent`.
-
-For example, you can use Qdrant as below:
-
-```python
-# Creating qdrant client
-from qdrant_client import QdrantClient
-
-client = QdrantClient(url="***", api_key="***")
-
-# Wrapping RetrieveUserProxyAgent
-from litellm import embedding as test_embedding
-from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
-from qdrant_client.models import SearchRequest, Filter, FieldCondition, MatchText
-
-class QdrantRetrieveUserProxyAgent(RetrieveUserProxyAgent):
-    def query_vector_db(
-        self,
-        query_texts: List[str],
-        n_results: int = 10,
-        search_string: str = "",
-        **kwargs,
-    ) -> Dict[str, Union[List[str], List[List[str]]]]:
-        # define your own query function here
-        embed_response = test_embedding('text-embedding-ada-002', input=query_texts)
-
-        all_embeddings: List[List[float]] = []
-
-        for item in embed_response['data']:
-            all_embeddings.append(item['embedding'])
-
-        search_queries: List[SearchRequest] = []
-
-        for embedding in all_embeddings:
-            search_queries.append(
-                SearchRequest(
-                    vector=embedding,
-                    filter=Filter(
-                        must=[
-                            FieldCondition(
-                                key="page_content",
-                                match=MatchText(
-                                    text=search_string,
-                                )
-                            )
-                        ]
-                    ),
-                    limit=n_results,
-                    with_payload=True,
-                )
-            )
-
-        search_response = client.search_batch(
-            collection_name="{your collection name}",
-            requests=search_queries,
-        )
-
-        return {
-            "ids": [[scored_point.id for scored_point in batch] for batch in search_response],
-            "documents": [[scored_point.payload.get('page_content', '') for scored_point in batch] for batch in search_response],
-            "metadatas": [[scored_point.payload.get('metadata', {}) for scored_point in batch] for batch in search_response]
-        }
-
-    def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = "", **kwargs):
-        results = self.query_vector_db(
-            query_texts=[problem],
-            n_results=n_results,
-            search_string=search_string,
-            **kwargs,
-        )
-
-        self._results = results
+We are using chromadb as the default vector database, you can also use mongodb, pgvectordb and qdrantdb
+by simply set `vector_db` to `mongodb`, `pgvector` and `qdrant` in `retrieve_config`, respectively.
 
+To plugin any other dbs, you can also extend class `agentchat.contrib.vectordb.base`,
+check out the code [here](https://github.com/microsoft/autogen/blob/main/autogen/agentchat/contrib/vectordb/base.py).
 
-# Use QdrantRetrieveUserProxyAgent
-qdrantragagent = QdrantRetrieveUserProxyAgent(
-    name="ragproxyagent",
-    human_input_mode="NEVER",
-    max_consecutive_auto_reply=2,
-    retrieve_config={
-        "task": "qa",
-    },
-)
-
-qdrantragagent.retrieve_docs("What is Autogen?", n_results=10, search_string="autogen")
-```
 
 ## Advanced Usage of RAG Agents
 ### Integrate with other agents in a group chat
@@ -340,15 +264,9 @@ def retrieve_content(
     n_results: Annotated[int, "number of results"] = 3,
 ) -> str:
     boss_aid.n_results = n_results  # Set the number of results to be retrieved.
-    # Check if we need to update the context.
-    update_context_case1, update_context_case2 = boss_aid._check_update_context(message)
-    if (update_context_case1 or update_context_case2) and boss_aid.update_context:
-        boss_aid.problem = message if not hasattr(boss_aid, "problem") else boss_aid.problem
-        _, ret_msg = boss_aid._generate_retrieve_user_reply(message)
-    else:
-        _context = {"problem": message, "n_results": n_results}
-        ret_msg = boss_aid.message_generator(boss_aid, None, _context)
-    return ret_msg if ret_msg else message
+    _context = {"problem": message, "n_results": n_results}
+    ret_msg = boss_aid.message_generator(boss_aid, None, _context)
+    return ret_msg or message
 
 for caller in [pm, coder, reviewer]:
     d_retrieve_content = caller.register_for_llm(
@@ -483,4 +401,6 @@ The online app and the source code are hosted in [HuggingFace](https://huggingfa
 You can check out more example notebooks for RAG use cases:
 - [Automated Code Generation and Question Answering with Retrieval Augmented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb)
 - [Group Chat with Retrieval Augmented Generation (with 5 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_RAG.ipynb)
-- [Automated Code Generation and Question Answering with Qdrant based Retrieval Augmented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_qdrant.ipynb)
+- [Using RetrieveChat with Qdrant for Retrieve Augmented Code Generation and Question Answering](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_qdrant.ipynb)
+- [Using RetrieveChat Powered by PGVector for Retrieve Augmented Code Generation and Question Answering](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_pgvector.ipynb)
+- [Using RetrieveChat Powered by MongoDB Atlas for Retrieve Augmented Code Generation and Question Answering](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_mongodb.ipynb)
diff --git a/website/docs/FAQ.mdx b/website/docs/FAQ.mdx
index 5a0adece6b0..2798ae9375b 100644
--- a/website/docs/FAQ.mdx
+++ b/website/docs/FAQ.mdx
@@ -259,16 +259,6 @@ user_proxy = autogen.UserProxyAgent(
     code_execution_config={"work_dir":"coding", "use_docker":False})
 ```
 
-## Migrating from `CompressibleAgent` and `TransformChatHistory` to `TransformMessages`
-
-### Why migrate to `TransformMessages`?
-
-Migrating enhances flexibility, modularity, and customization in handling chat message transformations. `TransformMessages` introduces an improved, extensible approach for pre-processing messages for conversational agents.
-
-### How to migrate?
-
-To ensure a smooth migration process, simply follow the detailed guide provided in [Introduction to TransformMessages](/docs/topics/handling_long_contexts/intro_to_transform_messages.md).
-
 ### What should I do if I get the error "TypeError: Assistants.create() got an unexpected keyword argument 'file_ids'"?
 
 This error typically occurs when using Autogen version earlier than 0.2.27 in combination with OpenAI library version 1.21 or later. The issue arises because the older version of Autogen does not support the file_ids parameter used by newer versions of the OpenAI API.
diff --git a/website/docs/Research.md b/website/docs/Research.md
index c8ba1d9c865..3e5e78959b7 100644
--- a/website/docs/Research.md
+++ b/website/docs/Research.md
@@ -2,16 +2,26 @@
 
 For technical details, please check our technical report and research publications.
 
-* [AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework](https://arxiv.org/abs/2308.08155). Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Shaokun Zhang, Erkang Zhu, Beibin Li, Li Jiang, Xiaoyun Zhang and Chi Wang. ArXiv 2023.
+
+* [AutoGen Studio: A No-Code Developer Tool for Building and Debugging Multi-Agent Systems](https://www.microsoft.com/en-us/research/publication/autogen-studio-a-no-code-developer-tool-for-building-and-debugging-multi-agent-systems/)
 
 ```bibtex
-@inproceedings{wu2023autogen,
+@inproceedings{dibia2024studio,
+      title={AutoGen Studio: A No-Code Developer Tool for Building and Debugging Multi-Agent Systems},
+      author={Victor Dibia and Jingya Chen and Gagan Bansal and Suff Syed and Adam Fourney and Erkang (Eric) Zhu and Chi Wang and Saleema Amershi},
+      year={2024},
+      booktitle={Pre-Print}
+}
+```
+
+* [AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework](https://aka.ms/autogen-pdf).
+
+```bibtex
+@inproceedings{wu2024autogen,
       title={AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework},
-      author={Qingyun Wu and Gagan Bansal and Jieyu Zhang and Yiran Wu and Shaokun Zhang and Erkang Zhu and Beibin Li and Li Jiang and Xiaoyun Zhang and Chi Wang},
-      year={2023},
-      eprint={2308.08155},
-      archivePrefix={arXiv},
-      primaryClass={cs.AI}
+      author={Qingyun Wu and Gagan Bansal and Jieyu Zhang and Yiran Wu and Beibin Li and Erkang Zhu and Li Jiang and Xiaoyun Zhang and Shaokun Zhang and Jiale Liu and Ahmed Hassan Awadallah and Ryen W White and Doug Burger and Chi Wang},
+      year={2024},
+      booktitle={COLM}
 }
 ```
 
diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 59156c0eb04..76f9959841b 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -21,7 +21,7 @@ The figure below shows the built-in agents in AutoGen.
 We have designed a generic [`ConversableAgent`](../reference/agentchat/conversable_agent.md#conversableagent-objects)
  class for Agents that are capable of conversing with each other through the exchange of messages to jointly finish a task. An agent can communicate with other agents and perform actions. Different agents can differ in what actions they perform after receiving messages. Two representative subclasses are [`AssistantAgent`](../reference/agentchat/assistant_agent.md#assistantagent-objects) and [`UserProxyAgent`](../reference/agentchat/user_proxy_agent.md#userproxyagent-objects)
 
-- The [`AssistantAgent`](../reference/agentchat/assistant_agent.md#assistantagent-objects) is designed to act as an AI assistant, using LLMs by default but not requiring human input or code execution. It could write Python code (in a Python coding block) for a user to execute when a message (typically a description of a task that needs to be solved) is received. Under the hood, the Python code is written by LLM (e.g., GPT-4). It can also receive the execution results and suggest corrections or bug fixes. Its behavior can be altered by passing a new system message. The LLM [inference](#enhanced-inference) configuration can be configured via [`llm_config`].
+- The [`AssistantAgent`](../reference/agentchat/assistant_agent.md#assistantagent-objects) is designed to act as an AI assistant, using LLMs by default but not requiring human input or code execution. It could write Python code (in a Python coding block) for a user to execute when a message (typically a description of a task that needs to be solved) is received. Under the hood, the Python code is written by LLM (e.g., GPT-4). It can also receive the execution results and suggest corrections or bug fixes. Its behavior can be altered by passing a new system message. The LLM [inference](/docs/Use-Cases/enhanced_inference) configuration can be configured via [`llm_config`].
 
 - The [`UserProxyAgent`](../reference/agentchat/user_proxy_agent.md#userproxyagent-objects) is conceptually a proxy agent for humans, soliciting human input as the agent's reply at each interaction turn by default and also having the capability to execute code and call functions or tools. The [`UserProxyAgent`](../reference/agentchat/user_proxy_agent.md#userproxyagent-objects) triggers code execution automatically when it detects an executable code block in the received message and no human user input is provided. Code execution can be disabled by setting the `code_execution_config` parameter to False. LLM-based response is disabled by default. It can be enabled by setting `llm_config` to a dict corresponding to the [inference](/docs/Use-Cases/enhanced_inference) configuration. When `llm_config` is set as a dictionary, [`UserProxyAgent`](../reference/agentchat/user_proxy_agent.md#userproxyagent-objects) can generate replies using an LLM when code execution is not performed.
 
@@ -83,7 +83,7 @@ With the pluggable auto-reply function, one can choose to invoke conversations w
 - Hierarchical chat like in [OptiGuide](https://github.com/microsoft/optiguide).
 - [Dynamic Group Chat](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat.ipynb) which is a special form of hierarchical chat. In the system, we register a reply function in the group chat manager, which broadcasts messages and decides who the next speaker will be in a group chat setting.
 - [Finite State Machine graphs to set speaker transition constraints](https://microsoft.github.io/autogen/docs/notebooks/agentchat_groupchat_finite_state_machine) which is a special form of dynamic group chat. In this approach, a directed transition matrix is fed into group chat. Users can specify legal transitions or specify disallowed transitions.
-- Nested chat like in [conversational chess](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_chess.ipynb).
+- Nested chat like in [conversational chess](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_nested_chats_chess.ipynb).
 
 2. LLM-Based Function Call
 
diff --git a/website/docs/contributor-guide/contributing.md b/website/docs/contributor-guide/contributing.md
index b1b6b848f66..cd2c62e408c 100644
--- a/website/docs/contributor-guide/contributing.md
+++ b/website/docs/contributor-guide/contributing.md
@@ -32,7 +32,3 @@ To see what we are working on and what we plan to work on, please check our
 ## Becoming a Reviewer
 
 There is currently no formal reviewer solicitation process. Current reviewers identify reviewers from active contributors. If you are willing to become a reviewer, you are welcome to let us know on discord.
-
-## Contact Maintainers
-
-The project is currently maintained by a [dynamic group of volunteers](https://butternut-swordtail-8a5.notion.site/410675be605442d3ada9a42eb4dfef30?v=fa5d0a79fd3d4c0f9c112951b2831cbb&pvs=4) from several different organizations. Contact project administrators Chi Wang and Qingyun Wu via auto-gen@outlook.com if you are interested in becoming a maintainer.
diff --git a/website/docs/installation/Docker.md b/website/docs/installation/Docker.md
index 4857f4f880c..a7fa6bd829b 100644
--- a/website/docs/installation/Docker.md
+++ b/website/docs/installation/Docker.md
@@ -82,6 +82,6 @@ docker run -it -p {WorkstationPortNum}:{DockerPortNum} -v {WorkStation_Dir}:{Doc
 
 ## Additional Resources
 
-- Details on all the Dockerfile options can be found in the [Dockerfile](https://github.com/microsoft/autogen/.devcontainer/README.md) README.
+- Details on all the Dockerfile options can be found in the [Dockerfile](https://github.com/microsoft/autogen/blob/main/.devcontainer/README.md) README.
 - For more information on Docker usage and best practices, refer to the [official Docker documentation](https://docs.docker.com).
 - Details on how to use the Dockerfile dev version can be found on the [Contributor Guide](/docs/contributor-guide/docker).
diff --git a/website/docs/topics/groupchat/transform_messages_speaker_selection.ipynb b/website/docs/topics/groupchat/transform_messages_speaker_selection.ipynb
new file mode 100644
index 00000000000..6e17d0cb94b
--- /dev/null
+++ b/website/docs/topics/groupchat/transform_messages_speaker_selection.ipynb
@@ -0,0 +1,246 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Using Transform Messages during Speaker Selection\n",
+    "\n",
+    "When using \"auto\" mode for speaker selection in group chats, a nested-chat is used to determine the next speaker. This nested-chat includes all of the group chat's messages and this can result in a lot of content which the LLM needs to process for determining the next speaker. As conversations progress, it can be challenging to keep the context length within the workable window for the LLM. Furthermore, reducing the number of overall tokens will improve inference time and reduce token costs.\n",
+    "\n",
+    "Using [Transform Messages](/docs/topics/handling_long_contexts/intro_to_transform_messages) you gain control over which messages are used for speaker selection and the context length within each message as well as overall.\n",
+    "\n",
+    "All the transforms available for Transform Messages can be applied to the speaker selection nested-chat, such as the `MessageHistoryLimiter`, `MessageTokenLimiter`, and `TextMessageCompressor`.\n",
+    "\n",
+    "## How do I apply them\n",
+    "\n",
+    "When instantiating your `GroupChat` object, all you need to do is assign a [TransformMessages](/docs/reference/agentchat/contrib/capabilities/transform_messages#transformmessages) object to the `select_speaker_transform_messages` parameter, and the transforms within it will be applied to the nested speaker selection chats.\n",
+    "\n",
+    "And, as you're passing in a `TransformMessages` object, multiple transforms can be applied to that nested chat.\n",
+    "\n",
+    "As part of the nested-chat, an agent called 'checking_agent' is used to direct the LLM on selecting the next speaker. It is preferable to avoid compressing or truncating the content from this agent. How this is done is shown in the second last example."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating transforms for speaker selection in a GroupChat\n",
+    "\n",
+    "We will progressively create a `TransformMessage` object to show how you can build up transforms for speaker selection.\n",
+    "\n",
+    "Each iteration will replace the previous one, enabling you to use the code in each cell as is.\n",
+    "\n",
+    "Importantly, transforms are applied in the order that they are in the transforms list."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Start by importing the transform capabilities\n",
+    "\n",
+    "import autogen\n",
+    "from autogen.agentchat.contrib.capabilities import transform_messages, transforms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Limit the number of messages\n",
+    "\n",
+    "# Let's start by limiting the number of messages to consider for speaker selection using a\n",
+    "# MessageHistoryLimiter transform. This example will use the latest 10 messages.\n",
+    "\n",
+    "select_speaker_transforms = transform_messages.TransformMessages(\n",
+    "    transforms=[\n",
+    "        transforms.MessageHistoryLimiter(max_messages=10),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Compress messages through an LLM\n",
+    "\n",
+    "# An interesting and very powerful method of reducing tokens is by \"compressing\" the text of\n",
+    "# a message by using an LLM that's specifically designed to do that. The default LLM used for\n",
+    "# this purpose is LLMLingua (https://github.com/microsoft/LLMLingua) and it aims to reduce the\n",
+    "# number of tokens without reducing the message's meaning. We use the TextMessageCompressor\n",
+    "# transform to compress messages.\n",
+    "\n",
+    "# There are multiple LLMLingua models available and it defaults to the first version, LLMLingua.\n",
+    "# This example will show how to use LongLLMLingua which is targeted towards long-context\n",
+    "# information processing. LLMLingua-2 has been released and you could use that as well.\n",
+    "\n",
+    "# Create the compression arguments, which allow us to specify the model and other related\n",
+    "# parameters, such as whether to use the CPU or GPU.\n",
+    "select_speaker_compression_args = dict(\n",
+    "    model_name=\"microsoft/llmlingua-2-xlm-roberta-large-meetingbank\", use_llmlingua2=True, device_map=\"cpu\"\n",
+    ")\n",
+    "\n",
+    "# Now we can add the TextMessageCompressor as the second step\n",
+    "\n",
+    "# Important notes on the parameters used:\n",
+    "# min_tokens - will only apply text compression if the message has at least 1,000 tokens\n",
+    "# cache - enables caching, if a message has previously been compressed it will use the\n",
+    "#         cached version instead of recompressing it (making it much faster)\n",
+    "# filter_dict - to minimise the chance of compressing key information, we can include or\n",
+    "#         exclude messages based on role and name.\n",
+    "#         Here, we are excluding any 'system' messages as well as any messages from\n",
+    "#         'ceo' (just for example) and the 'checking_agent', which is an agent in the\n",
+    "#         nested chat speaker selection chat. Change the 'ceo' name or add additional\n",
+    "#         agent names for any agents that have critical content.\n",
+    "# exclude_filter - As we are setting this to True, the filter will be an exclusion filter.\n",
+    "\n",
+    "# Import the cache functionality\n",
+    "from autogen.cache.in_memory_cache import InMemoryCache\n",
+    "\n",
+    "select_speaker_transforms = transform_messages.TransformMessages(\n",
+    "    transforms=[\n",
+    "        transforms.MessageHistoryLimiter(max_messages=10),\n",
+    "        transforms.TextMessageCompressor(\n",
+    "            min_tokens=1000,\n",
+    "            text_compressor=transforms.LLMLingua(select_speaker_compression_args, structured_compression=True),\n",
+    "            cache=InMemoryCache(seed=43),\n",
+    "            filter_dict={\"role\": [\"system\"], \"name\": [\"ceo\", \"checking_agent\"]},\n",
+    "            exclude_filter=True,\n",
+    "        ),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Limit the total number of tokens and tokens per message\n",
+    "\n",
+    "# As a final example, we can manage the total tokens and individual message tokens. We have added a\n",
+    "# MessageTokenLimiter transform that will limit the total number of tokens for the messages to\n",
+    "# 3,000 with a maximum of 500 per individual message. Additionally, if a message is less than 300\n",
+    "# tokens it will not be truncated.\n",
+    "\n",
+    "select_speaker_compression_args = dict(\n",
+    "    model_name=\"microsoft/llmlingua-2-xlm-roberta-large-meetingbank\", use_llmlingua2=True, device_map=\"cpu\"\n",
+    ")\n",
+    "\n",
+    "select_speaker_transforms = transform_messages.TransformMessages(\n",
+    "    transforms=[\n",
+    "        transforms.MessageHistoryLimiter(max_messages=10),\n",
+    "        transforms.TextMessageCompressor(\n",
+    "            min_tokens=1000,\n",
+    "            text_compressor=transforms.LLMLingua(select_speaker_compression_args, structured_compression=True),\n",
+    "            cache=InMemoryCache(seed=43),\n",
+    "            filter_dict={\"role\": [\"system\"], \"name\": [\"ceo\", \"checking_agent\"]},\n",
+    "            exclude_filter=True,\n",
+    "        ),\n",
+    "        transforms.MessageTokenLimiter(max_tokens=3000, max_tokens_per_message=500, min_tokens=300),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Now, we apply the transforms to a group chat. We do this by assigning the message\n",
+    "# transforms from above to the `select_speaker_transform_messages` parameter on the GroupChat.\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "llm_config = {\n",
+    "    \"config_list\": [{\"model\": \"gpt-4\", \"api_key\": os.environ[\"OPENAI_API_KEY\"]}],\n",
+    "}\n",
+    "\n",
+    "# Define your agents\n",
+    "chief_executive_officer = autogen.ConversableAgent(\n",
+    "    \"ceo\",\n",
+    "    llm_config=llm_config,\n",
+    "    max_consecutive_auto_reply=1,\n",
+    "    system_message=\"You are leading this group chat, and the business, as the chief executive officer.\",\n",
+    ")\n",
+    "\n",
+    "general_manager = autogen.ConversableAgent(\n",
+    "    \"gm\",\n",
+    "    llm_config=llm_config,\n",
+    "    max_consecutive_auto_reply=1,\n",
+    "    system_message=\"You are the general manager of the business, running the day-to-day operations.\",\n",
+    ")\n",
+    "\n",
+    "financial_controller = autogen.ConversableAgent(\n",
+    "    \"fin_controller\",\n",
+    "    llm_config=llm_config,\n",
+    "    max_consecutive_auto_reply=1,\n",
+    "    system_message=\"You are the financial controller, ensuring all financial matters are managed accordingly.\",\n",
+    ")\n",
+    "\n",
+    "your_group_chat = autogen.GroupChat(\n",
+    "    agents=[chief_executive_officer, general_manager, financial_controller],\n",
+    "    select_speaker_transform_messages=select_speaker_transforms,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "front_matter": {
+   "description": "Custom Speaker Selection Function",
+   "tags": [
+    "orchestration",
+    "group chat"
+   ]
+  },
+  "kernelspec": {
+   "display_name": "autogen",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/website/docs/topics/handling_long_contexts/intro_to_transform_messages.md b/website/docs/topics/handling_long_contexts/intro_to_transform_messages.md
index d0a53702c48..52fea15d01e 100644
--- a/website/docs/topics/handling_long_contexts/intro_to_transform_messages.md
+++ b/website/docs/topics/handling_long_contexts/intro_to_transform_messages.md
@@ -59,7 +59,28 @@ pprint.pprint(processed_messages)
 {'content': 'very very very very very very long string', 'role': 'user'}]
 ```
 
-By applying the `MessageHistoryLimiter`, we can see that we were able to limit the context history to the 3 most recent messages.
+By applying the `MessageHistoryLimiter`, we can see that we were able to limit the context history to the 3 most recent messages. However, if the splitting point is between a "tool_calls" and "tool" pair, the complete pair will be included to obey the OpenAI API call constraints.
+
+```python
+max_msg_transfrom = transforms.MessageHistoryLimiter(max_messages=3)
+
+messages = [
+    {"role": "user", "content": "hello"},
+    {"role": "tool_calls", "content": "calling_tool"},
+    {"role": "tool", "content": "tool_response"},
+    {"role": "user", "content": "how are you"},
+    {"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]},
+]
+
+processed_messages = max_msg_transfrom.apply_transform(copy.deepcopy(messages))
+pprint.pprint(processed_messages)
+```
+```console
+[{'content': 'calling_tool', 'role': 'tool_calls'},
+{'content': 'tool_response', 'role': 'tool'},
+{'content': 'how are you', 'role': 'user'},
+{'content': [{'text': 'are you doing?', 'type': 'text'}], 'role': 'assistant'}]
+```
 
 #### Example 2: Limiting the Number of Tokens
 
diff --git a/website/docs/topics/non-openai-models/cloud-anthropic.ipynb b/website/docs/topics/non-openai-models/cloud-anthropic.ipynb
index c5b757f8288..a6c87b6a5ca 100644
--- a/website/docs/topics/non-openai-models/cloud-anthropic.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-anthropic.ipynb
@@ -21,7 +21,7 @@
     "Additionally, this client class provides support for function/tool calling and will track token usage and cost correctly as per Anthropic's API costs (as of June 2024).\n",
     "\n",
     "## Requirements\n",
-    "To use Anthropic Claude with AutoGen, first you need to install the `pyautogen[\"anthropic]` package.\n",
+    "To use Anthropic Claude with AutoGen, first you need to install the `pyautogen[anthropic]` package.\n",
     "\n",
     "To try out the function call feature of Claude model, you need to install `anthropic>=0.23.1`.\n"
    ]
@@ -32,7 +32,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# !pip install pyautogen\n",
     "!pip install pyautogen[\"anthropic\"]"
    ]
   },
diff --git a/website/docs/topics/non-openai-models/cloud-bedrock.ipynb b/website/docs/topics/non-openai-models/cloud-bedrock.ipynb
new file mode 100644
index 00000000000..71c1e2e7ffe
--- /dev/null
+++ b/website/docs/topics/non-openai-models/cloud-bedrock.ipynb
@@ -0,0 +1,1298 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Amazon Bedrock\n",
+    "\n",
+    "AutoGen allows you to use Amazon's generative AI Bedrock service to run inference with a number of open-weight models and as well as their own models.\n",
+    "\n",
+    "Amazon Bedrock supports models from providers such as Meta, Anthropic, Cohere, and Mistral.\n",
+    "\n",
+    "In this notebook, we demonstrate how to use Anthropic's Sonnet model for AgentChat in AutoGen.\n",
+    "\n",
+    "## Model features / support\n",
+    "\n",
+    "Amazon Bedrock supports a wide range of models, not only for text generation but also for image classification and generation. Not all features are supported by AutoGen or by the Converse API used. Please see [Amazon's documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html#conversation-inference-supported-models-features) on the features supported by the Converse API.\n",
+    "\n",
+    "At this point in time AutoGen supports text generation and image classification (passing images to the LLM).\n",
+    "\n",
+    "It does not, yet, support image generation ([contribute](https://microsoft.github.io/autogen/docs/contributor-guide/contributing/)).\n",
+    "\n",
+    "## Requirements\n",
+    "To use Amazon Bedrock with AutoGen, first you need to install the `pyautogen[bedrock]` package.\n",
+    "\n",
+    "## Pricing\n",
+    "\n",
+    "When we combine the number of models supported and costs being on a per-region basis, it's not feasible to maintain the costs for each model+region combination within the AutoGen implementation. Therefore, it's recommended that you add the following to your config with cost per 1,000 input and output tokens, respectively:\n",
+    "```\n",
+    "{\n",
+    "    ...\n",
+    "    \"price\": [0.003, 0.015]\n",
+    "    ...\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "Amazon Bedrock pricing is available [here](https://aws.amazon.com/bedrock/pricing/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you need to install AutoGen with Amazon Bedrock\n",
+    "!pip install pyautogen[\"bedrock\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set the config for Amazon Bedrock\n",
+    "\n",
+    "Amazon's Bedrock does not use the `api_key` as per other cloud inference providers for authentication, instead it uses a number of access, token, and profile values. These fields will need to be added to your client configuration. Please check the Amazon Bedrock documentation to determine which ones you will need to add.\n",
+    "\n",
+    "The available parameters are:\n",
+    "\n",
+    "- aws_region (mandatory)\n",
+    "- aws_access_key (or environment variable: AWS_ACCESS_KEY)\n",
+    "- aws_secret_key (or environment variable: AWS_SECRET_KEY)\n",
+    "- aws_session_token (or environment variable: AWS_SESSION_TOKEN)\n",
+    "- aws_profile_name\n",
+    "\n",
+    "Beyond the authentication credentials, the only mandatory parameters are `api_type` and `model`.\n",
+    "\n",
+    "The following parameters are common across all models used:\n",
+    "\n",
+    "- temperature\n",
+    "- topP\n",
+    "- maxTokens\n",
+    "\n",
+    "You can also include parameters specific to the model you are using (see the model detail within Amazon's documentation for more information), the four supported additional parameters are:\n",
+    "\n",
+    "- top_p\n",
+    "- top_k\n",
+    "- k\n",
+    "- seed\n",
+    "\n",
+    "An additional parameter can be added that denotes whether the model supports a system prompt (which is where the system messages are not included in the message list, but in a separate parameter). This defaults to `True`, so set it to `False` if your model (for example Mistral's Instruct models) [doesn't support this feature](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html#conversation-inference-supported-models-features):\n",
+    "\n",
+    "- supports_system_prompts\n",
+    "\n",
+    "It is important to add the `api_type` field and set it to a string that corresponds to the client type used: `bedrock`.\n",
+    "\n",
+    "Example:\n",
+    "```\n",
+    "[\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"amazon.titan-text-premier-v1:0\",\n",
+    "        \"aws_region\": \"us-east-1\"\n",
+    "        \"aws_access_key\": \"\",\n",
+    "        \"aws_secret_key\": \"\",\n",
+    "        \"aws_session_token\": \"\",\n",
+    "        \"aws_profile_name\": \"\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"anthropic.claude-3-sonnet-20240229-v1:0\",\n",
+    "        \"aws_region\": \"us-east-1\"\n",
+    "        \"aws_access_key\": \"\",\n",
+    "        \"aws_secret_key\": \"\",\n",
+    "        \"aws_session_token\": \"\",\n",
+    "        \"aws_profile_name\": \"\",\n",
+    "        \"temperature\": 0.5,\n",
+    "        \"topP\": 0.2,\n",
+    "        \"maxTokens\": 250,\n",
+    "    },\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"mistral.mixtral-8x7b-instruct-v0:1\",\n",
+    "        \"aws_region\": \"us-east-1\"\n",
+    "        \"aws_access_key\": \"\",\n",
+    "        \"aws_secret_key\": \"\",\n",
+    "        \"supports_system_prompts\": False, # Mistral Instruct models don't support a separate system prompt\n",
+    "        \"price\": [0.00045, 0.0007] # Specific pricing for this model/region\n",
+    "    }\n",
+    "]\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Two-agent Coding Example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Configuration\n",
+    "\n",
+    "Start with our configuration - we'll use Anthropic's Sonnet model and put in recent pricing. Additionally, we'll reduce the temperature to 0.1 so its responses are less varied."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing_extensions import Annotated\n",
+    "\n",
+    "import autogen\n",
+    "\n",
+    "config_list_bedrock = [\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"anthropic.claude-3-sonnet-20240229-v1:0\",\n",
+    "        \"aws_region\": \"us-east-1\",\n",
+    "        \"aws_access_key\": \"[FILL THIS IN]\",\n",
+    "        \"aws_secret_key\": \"[FILL THIS IN]\",\n",
+    "        \"price\": [0.003, 0.015],\n",
+    "        \"temperature\": 0.1,\n",
+    "        \"cache_seed\": None,  # turn off caching\n",
+    "    }\n",
+    "]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Construct Agents\n",
+    "\n",
+    "Construct a simple conversation between a User proxy and an ConversableAgent, which uses the Sonnet model.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assistant = autogen.AssistantAgent(\n",
+    "    \"assistant\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_bedrock,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "user_proxy = autogen.UserProxyAgent(\n",
+    "    \"user_proxy\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    code_execution_config={\n",
+    "        \"work_dir\": \"coding\",\n",
+    "        \"use_docker\": False,\n",
+    "    },\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\") and \"TERMINATE\" in x.get(\"content\", \"\"),\n",
+    "    max_consecutive_auto_reply=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initiate Chat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
+      "\n",
+      "Write a python program to print the first 10 numbers of the Fibonacci sequence. Just output the python code, no additional information.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
+      "\n",
+      "```python\n",
+      "# Define a function to calculate Fibonacci sequence\n",
+      "def fibonacci(n):\n",
+      "    if n <= 0:\n",
+      "        return []\n",
+      "    elif n == 1:\n",
+      "        return [0]\n",
+      "    elif n == 2:\n",
+      "        return [0, 1]\n",
+      "    else:\n",
+      "        sequence = [0, 1]\n",
+      "        for i in range(2, n):\n",
+      "            sequence.append(sequence[i-1] + sequence[i-2])\n",
+      "        return sequence\n",
+      "\n",
+      "# Call the function to get the first 10 Fibonacci numbers\n",
+      "fib_sequence = fibonacci(10)\n",
+      "print(fib_sequence)\n",
+      "```\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[31m\n",
+      ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n",
+      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
+      "\n",
+      "exitcode: 0 (execution succeeded)\n",
+      "Code output: \n",
+      "[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
+      "\n",
+      "Great, the code executed successfully and printed the first 10 numbers of the Fibonacci sequence correctly.\n",
+      "\n",
+      "TERMINATE\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "ChatResult(chat_id=None, chat_history=[{'content': 'Write a python program to print the first 10 numbers of the Fibonacci sequence. Just output the python code, no additional information.', 'role': 'assistant'}, {'content': '```python\\n# Define a function to calculate Fibonacci sequence\\ndef fibonacci(n):\\n    if n <= 0:\\n        return []\\n    elif n == 1:\\n        return [0]\\n    elif n == 2:\\n        return [0, 1]\\n    else:\\n        sequence = [0, 1]\\n        for i in range(2, n):\\n            sequence.append(sequence[i-1] + sequence[i-2])\\n        return sequence\\n\\n# Call the function to get the first 10 Fibonacci numbers\\nfib_sequence = fibonacci(10)\\nprint(fib_sequence)\\n```', 'role': 'user'}, {'content': 'exitcode: 0 (execution succeeded)\\nCode output: \\n[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]\\n', 'role': 'assistant'}, {'content': 'Great, the code executed successfully and printed the first 10 numbers of the Fibonacci sequence correctly.\\n\\nTERMINATE', 'role': 'user'}], summary='Great, the code executed successfully and printed the first 10 numbers of the Fibonacci sequence correctly.\\n\\n', cost={'usage_including_cached_inference': {'total_cost': 0.00624, 'anthropic.claude-3-sonnet-20240229-v1:0': {'cost': 0.00624, 'prompt_tokens': 1210, 'completion_tokens': 174, 'total_tokens': 1384}}, 'usage_excluding_cached_inference': {'total_cost': 0.00624, 'anthropic.claude-3-sonnet-20240229-v1:0': {'cost': 0.00624, 'prompt_tokens': 1210, 'completion_tokens': 174, 'total_tokens': 1384}}}, human_input=[])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "user_proxy.initiate_chat(\n",
+    "    assistant,\n",
+    "    message=\"Write a python program to print the first 10 numbers of the Fibonacci sequence. Just output the python code, no additional information.\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tool Call Example\n",
+    "\n",
+    "In this example, instead of writing code, we will show how we can perform multiple tool calling with Meta's Llama 3.1 70B model, where it recommends calling more than one tool at a time.\n",
+    "\n",
+    "We'll use a simple travel agent assistant program where we have a couple of tools for weather and currency conversion."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Agents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "from typing import Literal\n",
+    "\n",
+    "import autogen\n",
+    "\n",
+    "config_list_bedrock = [\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"meta.llama3-1-70b-instruct-v1:0\",\n",
+    "        \"aws_region\": \"us-west-2\",\n",
+    "        \"aws_access_key\": \"[FILL THIS IN]\",\n",
+    "        \"aws_secret_key\": \"[FILL THIS IN]\",\n",
+    "        \"price\": [0.00265, 0.0035],\n",
+    "        \"cache_seed\": None,  # turn off caching\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Create the agent and include examples of the function calling JSON in the prompt\n",
+    "# to help guide the model\n",
+    "chatbot = autogen.AssistantAgent(\n",
+    "    name=\"chatbot\",\n",
+    "    system_message=\"\"\"For currency exchange and weather forecasting tasks,\n",
+    "        only use the functions you have been provided with.\n",
+    "        Output only the word 'TERMINATE' when an answer has been provided.\n",
+    "        Use both tools together if you can.\"\"\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_bedrock,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "user_proxy = autogen.UserProxyAgent(\n",
+    "    name=\"user_proxy\",\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\") and \"TERMINATE\" in x.get(\"content\", \"\"),\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the two functions, annotating them so that those descriptions can be passed through to the LLM.\n",
+    "\n",
+    "With Meta's Llama 3.1 models, they are more likely to pass a numeric parameter as a string, e.g. \"123.45\" instead of 123.45, so we'll convert numeric parameters from strings to floats if necessary.\n",
+    "\n",
+    "We associate them with the agents using `register_for_execution` for the user_proxy so it can execute the function and `register_for_llm` for the chatbot (powered by the LLM) so it can pass the function definitions to the LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Currency Exchange function\n",
+    "\n",
+    "CurrencySymbol = Literal[\"USD\", \"EUR\"]\n",
+    "\n",
+    "# Define our function that we expect to call\n",
+    "\n",
+    "\n",
+    "def exchange_rate(base_currency: CurrencySymbol, quote_currency: CurrencySymbol) -> float:\n",
+    "    if base_currency == quote_currency:\n",
+    "        return 1.0\n",
+    "    elif base_currency == \"USD\" and quote_currency == \"EUR\":\n",
+    "        return 1 / 1.1\n",
+    "    elif base_currency == \"EUR\" and quote_currency == \"USD\":\n",
+    "        return 1.1\n",
+    "    else:\n",
+    "        raise ValueError(f\"Unknown currencies {base_currency}, {quote_currency}\")\n",
+    "\n",
+    "\n",
+    "# Register the function with the agent\n",
+    "\n",
+    "\n",
+    "@user_proxy.register_for_execution()\n",
+    "@chatbot.register_for_llm(description=\"Currency exchange calculator.\")\n",
+    "def currency_calculator(\n",
+    "    base_amount: Annotated[float, \"Amount of currency in base_currency, float values (no strings), e.g. 987.82\"],\n",
+    "    base_currency: Annotated[CurrencySymbol, \"Base currency\"] = \"USD\",\n",
+    "    quote_currency: Annotated[CurrencySymbol, \"Quote currency\"] = \"EUR\",\n",
+    ") -> str:\n",
+    "    # If the amount is passed in as a string, e.g. \"123.45\", attempt to convert to a float\n",
+    "    if isinstance(base_amount, str):\n",
+    "        base_amount = float(base_amount)\n",
+    "\n",
+    "    quote_amount = exchange_rate(base_currency, quote_currency) * base_amount\n",
+    "    return f\"{format(quote_amount, '.2f')} {quote_currency}\"\n",
+    "\n",
+    "\n",
+    "# Weather function\n",
+    "\n",
+    "\n",
+    "# Example function to make available to model\n",
+    "def get_current_weather(location, unit=\"fahrenheit\"):\n",
+    "    \"\"\"Get the weather for some location\"\"\"\n",
+    "    if \"chicago\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"Chicago\", \"temperature\": \"13\", \"unit\": unit})\n",
+    "    elif \"san francisco\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"55\", \"unit\": unit})\n",
+    "    elif \"new york\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"New York\", \"temperature\": \"11\", \"unit\": unit})\n",
+    "    else:\n",
+    "        return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
+    "\n",
+    "\n",
+    "# Register the function with the agent\n",
+    "\n",
+    "\n",
+    "@user_proxy.register_for_execution()\n",
+    "@chatbot.register_for_llm(description=\"Weather forecast for US cities.\")\n",
+    "def weather_forecast(\n",
+    "    location: Annotated[str, \"City name\"],\n",
+    ") -> str:\n",
+    "    weather_details = get_current_weather(location=location)\n",
+    "    weather = json.loads(weather_details)\n",
+    "    return f\"{weather['location']} will be {weather['temperature']} degrees {weather['unit']}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We pass through our customer's message and run the chat.\n",
+    "\n",
+    "Finally, we ask the LLM to summarise the chat and print that out."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "What's the weather in New York and can you tell me how much is 123.45 EUR in USD so I can spend it on my holiday?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
+      "\n",
+      "\n",
+      "\u001b[32m***** Suggested tool call (tooluse__h3d1AEDR3Sm2XRoGCjc2Q): weather_forecast *****\u001b[0m\n",
+      "Arguments: \n",
+      "{\"location\": \"New York\"}\n",
+      "\u001b[32m**********************************************************************************\u001b[0m\n",
+      "\u001b[32m***** Suggested tool call (tooluse_wrdda3wRRO-ugUY4qrv8YQ): currency_calculator *****\u001b[0m\n",
+      "Arguments: \n",
+      "{\"base_amount\": \"123\", \"base_currency\": \"EUR\", \"quote_currency\": \"USD\"}\n",
+      "\u001b[32m*************************************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[35m\n",
+      ">>>>>>>> EXECUTING FUNCTION weather_forecast...\u001b[0m\n",
+      "\u001b[35m\n",
+      ">>>>>>>> EXECUTING FUNCTION currency_calculator...\u001b[0m\n",
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "\u001b[32m***** Response from calling tool (tooluse__h3d1AEDR3Sm2XRoGCjc2Q) *****\u001b[0m\n",
+      "New York will be 11 degrees fahrenheit\n",
+      "\u001b[32m***********************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33muser_proxy\u001b[0m (to chatbot):\n",
+      "\n",
+      "\u001b[32m***** Response from calling tool (tooluse_wrdda3wRRO-ugUY4qrv8YQ) *****\u001b[0m\n",
+      "135.30 USD\n",
+      "\u001b[32m***********************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchatbot\u001b[0m (to user_proxy):\n",
+      "\n",
+      "\n",
+      "\n",
+      "TERMINATE\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\n",
+      "\n",
+      "The weather in New York is 11 degrees Fahrenheit. 123.45 EUR is equivalent to 135.30 USD.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# start the conversation\n",
+    "res = user_proxy.initiate_chat(\n",
+    "    chatbot,\n",
+    "    message=\"What's the weather in New York and can you tell me how much is 123.45 EUR in USD so I can spend it on my holiday?\",\n",
+    "    summary_method=\"reflection_with_llm\",\n",
+    ")\n",
+    "\n",
+    "print(res.summary[\"content\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Group Chat Example with Anthropic's Claude 3 Sonnet, Mistral's Large 2, and Meta's Llama 3.1 70B\n",
+    "\n",
+    "The flexibility of using LLMs from the industry's leading providers, particularly larger models, with Amazon Bedrock allows you to use multiple of them in a single workflow.\n",
+    "\n",
+    "Here we have a conversation that has two models (Anthropic's Claude 3 Sonnet and Mistral's Large 2) debate each other with another as the judge (Meta's Llama 3.1 70B). Additionally, a tool call is made to pull through some mock news that they will debate on."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33muser_proxy\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Analyze the potential of Anthropic and Mistral to revolutionize the field of AI based on today's headlines. Today is 06202024. Start by selecting 'research_assistant' to get relevant news articles and then ask sonnet_agent and mistral_agent to respond before the judge evaluates the conversation.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: research_assistant\n",
+      "\u001b[0m\n",
+      "\u001b[33mresearch_assistant\u001b[0m (to chat_manager):\n",
+      "\n",
+      "\n",
+      "\u001b[32m***** Suggested tool call (tooluse_7lcHbL3TT5WHyTl8Ee0Kmg): get_headlines *****\u001b[0m\n",
+      "Arguments: \n",
+      "{\"headline_date\": \"06202024\"}\n",
+      "\u001b[32m*******************************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: code_interpreter\n",
+      "\u001b[0m\n",
+      "\u001b[35m\n",
+      ">>>>>>>> EXECUTING FUNCTION get_headlines...\u001b[0m\n",
+      "\u001b[33mcode_interpreter\u001b[0m (to chat_manager):\n",
+      "\n",
+      "\u001b[33mcode_interpreter\u001b[0m (to chat_manager):\n",
+      "\n",
+      "\u001b[32m***** Response from calling tool (tooluse_7lcHbL3TT5WHyTl8Ee0Kmg) *****\u001b[0m\n",
+      "Epic Duel of the Titans: Anthropic and Mistral Usher in a New Era of Text Generation Excellence.\n",
+      "        In a groundbreaking revelation that has sent shockwaves through the AI industry, Anthropic has unveiled \n",
+      "        their state-of-the-art text generation model, Sonnet, hailed as a monumental leap in artificial intelligence. \n",
+      "        Almost simultaneously, Mistral countered with their equally formidable creation, Large 2, showcasing \n",
+      "        unparalleled prowess in generating coherent and contextually rich text. This scintillating rivalry \n",
+      "        between two AI behemoths promises to revolutionize the landscape of machine learning, heralding an \n",
+      "        era of unprecedented creativity and sophistication in text generation that will reshape industries, \n",
+      "        ignite innovation, and captivate minds worldwide.\n",
+      "\u001b[32m***********************************************************************\u001b[0m\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: sonnet_agent\n",
+      "\u001b[0m\n",
+      "\u001b[33msonnet_agent\u001b[0m (to chat_manager):\n",
+      "\n",
+      "I am sonnet_agent:\n",
+      "\n",
+      "This headline showcases the incredible advancements Anthropic has made with our state-of-the-art text generation model, Sonnet. Our breakthrough natural language processing capabilities represent a quantum leap forward, allowing us to generate text of unparalleled coherence, nuance, and contextual richness. \n",
+      "\n",
+      "While our competitor Mistral has also unveiled an impressive model in Large 2, Anthropic's Sonnet stands apart through its ability to truly understand and reason about the subject matter it is generating text on. This deeper comprehension allows Sonnet to produce profoundly insightful and substantive content that goes far beyond mere parroting of data.\n",
+      "\n",
+      "The implications of this milestone are boundless. From revolutionizing how we communicate and interact with AI assistants, to unlocking new frontiers in creative writing, journalism, education and scientific research - Sonnet has the potential to completely transform industry after industry. Anthropic is poised to be the definitive leader driving this paradigm shift.\n",
+      "\n",
+      "I welcome the healthy competition from Mistral, as it will only spur us to continue raising the bar. But make no mistake - with Sonnet, the future of advanced AI belongs to Anthropic. We have crossed a monumental threshold, and will continue our trailblazing work to deliver AI solutions that redefine what is possible. The world is witnessing the dawn of a new era of unfathomable potential.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: mistral_agent\n",
+      "\u001b[0m\n",
+      "\u001b[33mmistral_agent\u001b[0m (to chat_manager):\n",
+      "\n",
+      "I am mistral_agent:\n",
+      "\n",
+      "While it is commendable to see the progress made by Anthropic with their Sonnet model, it is essential to recognize the groundbreaking strides Mistral has taken with our Large 2 model. Large 2 represents not just an incremental improvement but a transformative leap in text generation capabilities, setting new benchmarks for coherence, contextual understanding, and creative expression.\n",
+      "\n",
+      "Unlike Sonnet, which focuses heavily on understanding and reasoning, Large 2 excels in both comprehension and the nuanced generation of text that is indistinguishable from human writing. This balance allows Large 2 to produce content that is not only insightful but also incredibly engaging and natural, making it an invaluable tool across a broad spectrum of applications.\n",
+      "\n",
+      "The potential of Large 2 extends far beyond traditional text generation. It can revolutionize fields such as content creation, customer service, marketing, and even personalized learning experiences. Our model's ability to adapt to various contexts and generate contextually rich responses makes it a versatile and powerful tool for any industry looking to harness the power of AI.\n",
+      "\n",
+      "While we appreciate the competition from Anthropic, we firmly believe that Large 2 stands at the forefront of AI innovation. The future of AI is not just about understanding and reasoning; it's about creating content that resonates with people on a deep level. With Large 2, Mistral is paving the way for a future where AI-generated text is not just functional but also profoundly human-like.\n",
+      "\n",
+      "Pass to the judge.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: judge\n",
+      "\u001b[0m\n",
+      "\u001b[33mjudge\u001b[0m (to chat_manager):\n",
+      "\n",
+      "\n",
+      "\n",
+      "After carefully evaluating the arguments presented by both sonnet_agent and mistral_agent, I have reached a decision.\n",
+      "\n",
+      "Both Anthropic's Sonnet and Mistral's Large 2 have demonstrated remarkable advancements in text generation capabilities, showcasing the potential to revolutionize various industries and transform the way we interact with AI.\n",
+      "\n",
+      "However, upon closer examination, I find that mistral_agent's argument presents a more convincing case for why Large 2 stands at the forefront of AI innovation. The emphasis on balance between comprehension and nuanced generation of text that is indistinguishable from human writing sets Large 2 apart. This balance is crucial for creating content that is not only insightful but also engaging and natural, making it a versatile tool across a broad spectrum of applications.\n",
+      "\n",
+      "Furthermore, mistral_agent's argument highlights the potential of Large 2 to revolutionize fields beyond traditional text generation, such as content creation, customer service, marketing, and personalized learning experiences. This versatility and adaptability make Large 2 a powerful tool for any industry looking to harness the power of AI.\n",
+      "\n",
+      "In contrast, while sonnet_agent's argument showcases the impressive capabilities of Sonnet, it focuses heavily on understanding and reasoning, which, although important, may not be enough to set it apart from Large 2.\n",
+      "\n",
+      "Therefore, based on the arguments presented, I conclude that Mistral's Large 2 has the potential to revolutionize the field of AI more significantly than Anthropic's Sonnet.\n",
+      "\n",
+      "TERMINATE.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m\n",
+      "Next speaker: code_interpreter\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "ChatResult(chat_id=None, chat_history=[{'content': \"Analyze the potential of Anthropic and Mistral to revolutionize the field of AI based on today's headlines. Today is 06202024. Start by selecting 'research_assistant' to get relevant news articles and then ask sonnet_agent and mistral_agent to respond before the judge evaluates the conversation.\", 'role': 'assistant'}], summary=\"Analyze the potential of Anthropic and Mistral to revolutionize the field of AI based on today's headlines. Today is 06202024. Start by selecting 'research_assistant' to get relevant news articles and then ask sonnet_agent and mistral_agent to respond before the judge evaluates the conversation.\", cost={'usage_including_cached_inference': {'total_cost': 0}, 'usage_excluding_cached_inference': {'total_cost': 0}}, human_input=[])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from typing import Annotated, Literal\n",
+    "\n",
+    "import autogen\n",
+    "from autogen import AssistantAgent, GroupChat, GroupChatManager, UserProxyAgent\n",
+    "\n",
+    "config_list_sonnet = [\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"anthropic.claude-3-sonnet-20240229-v1:0\",\n",
+    "        \"aws_region\": \"us-east-1\",\n",
+    "        \"aws_access_key\": \"[FILL THIS IN]\",\n",
+    "        \"aws_secret_key\": \"[FILL THIS IN]\",\n",
+    "        \"price\": [0.003, 0.015],\n",
+    "        \"temperature\": 0.1,\n",
+    "        \"cache_seed\": None,  # turn off caching\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "config_list_mistral = [\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"mistral.mistral-large-2407-v1:0\",\n",
+    "        \"aws_region\": \"us-west-2\",\n",
+    "        \"aws_access_key\": \"[FILL THIS IN]\",\n",
+    "        \"aws_secret_key\": \"[FILL THIS IN]\",\n",
+    "        \"price\": [0.003, 0.009],\n",
+    "        \"temperature\": 0.1,\n",
+    "        \"cache_seed\": None,  # turn off caching\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "config_list_llama31_70b = [\n",
+    "    {\n",
+    "        \"api_type\": \"bedrock\",\n",
+    "        \"model\": \"meta.llama3-1-70b-instruct-v1:0\",\n",
+    "        \"aws_region\": \"us-west-2\",\n",
+    "        \"aws_access_key\": \"[FILL THIS IN]\",\n",
+    "        \"aws_secret_key\": \"[FILL THIS IN]\",\n",
+    "        \"price\": [0.00265, 0.0035],\n",
+    "        \"temperature\": 0.1,\n",
+    "        \"cache_seed\": None,  # turn off caching\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "alice = AssistantAgent(\n",
+    "    \"sonnet_agent\",\n",
+    "    system_message=\"You are from Anthropic, an AI company that created the Sonnet large language model. You make arguments to support your company's position. You analyse given text. You are not a programmer and don't use Python. Pass to mistral_agent when you have finished. Start your response with 'I am sonnet_agent'.\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_sonnet,\n",
+    "    },\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\").find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "bob = autogen.AssistantAgent(\n",
+    "    \"mistral_agent\",\n",
+    "    system_message=\"You are from Mistral, an AI company that created the Large v2 large language model. You make arguments to support your company's position. You analyse given text. You are not a programmer and don't use Python. Pass to the judge if you have finished. Start your response with 'I am mistral_agent'.\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_mistral,\n",
+    "    },\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\").find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "charlie = AssistantAgent(\n",
+    "    \"research_assistant\",\n",
+    "    system_message=\"You are a helpful assistant to research the latest news and headlines. You have access to call functions to get the latest news articles for research through 'code_interpreter'.\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_llama31_70b,\n",
+    "    },\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\").find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "dan = AssistantAgent(\n",
+    "    \"judge\",\n",
+    "    system_message=\"You are a judge. You will evaluate the arguments and make a decision on which one is more convincing. End your decision with the word 'TERMINATE' to conclude the debate.\",\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_llama31_70b,\n",
+    "    },\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\").find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "code_interpreter = UserProxyAgent(\n",
+    "    \"code_interpreter\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    code_execution_config={\n",
+    "        \"work_dir\": \"coding\",\n",
+    "        \"use_docker\": False,\n",
+    "    },\n",
+    "    default_auto_reply=\"\",\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\").find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "@code_interpreter.register_for_execution()  # Decorator factory for registering a function to be executed by an agent\n",
+    "@charlie.register_for_llm(\n",
+    "    name=\"get_headlines\", description=\"Get the headline of a particular day.\"\n",
+    ")  # Decorator factory for registering a function to be used by an agent\n",
+    "def get_headlines(headline_date: Annotated[str, \"Date in MMDDYY format, e.g., 06192024\"]) -> str:\n",
+    "    mock_news = {\n",
+    "        \"06202024\": \"\"\"Epic Duel of the Titans: Anthropic and Mistral Usher in a New Era of Text Generation Excellence.\n",
+    "        In a groundbreaking revelation that has sent shockwaves through the AI industry, Anthropic has unveiled\n",
+    "        their state-of-the-art text generation model, Sonnet, hailed as a monumental leap in artificial intelligence.\n",
+    "        Almost simultaneously, Mistral countered with their equally formidable creation, Large 2, showcasing\n",
+    "        unparalleled prowess in generating coherent and contextually rich text. This scintillating rivalry\n",
+    "        between two AI behemoths promises to revolutionize the landscape of machine learning, heralding an\n",
+    "        era of unprecedented creativity and sophistication in text generation that will reshape industries,\n",
+    "        ignite innovation, and captivate minds worldwide.\"\"\",\n",
+    "        \"06192024\": \"OpenAI founder Sutskever sets up new AI company devoted to safe superintelligence.\",\n",
+    "    }\n",
+    "    return mock_news.get(headline_date, \"No news available for today.\")\n",
+    "\n",
+    "\n",
+    "user_proxy = UserProxyAgent(\n",
+    "    \"user_proxy\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    code_execution_config=False,\n",
+    "    default_auto_reply=\"\",\n",
+    "    is_termination_msg=lambda x: x.get(\"content\", \"\").find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "groupchat = GroupChat(\n",
+    "    agents=[alice, bob, charlie, dan, code_interpreter],\n",
+    "    messages=[],\n",
+    "    allow_repeat_speaker=False,\n",
+    "    max_round=10,\n",
+    ")\n",
+    "\n",
+    "manager = GroupChatManager(\n",
+    "    groupchat=groupchat,\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list_llama31_70b,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "task = \"Analyze the potential of Anthropic and Mistral to revolutionize the field of AI based on today's headlines. Today is 06202024. Start by selecting 'research_assistant' to get relevant news articles and then ask sonnet_agent and mistral_agent to respond before the judge evaluates the conversation.\"\n",
+    "\n",
+    "user_proxy.initiate_chat(manager, message=task)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And there we have it, a number of different LLMs all collaborating together on a single cloud platform."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Image classification with Anthropic's Claude 3 Sonnet\n",
+    "\n",
+    "AutoGen's Amazon Bedrock client class supports inputting images for the LLM to respond to.\n",
+    "\n",
+    "In this simple example, we'll use an image on the Internet and send it to Anthropic's Claude 3 Sonnet model to describe.\n",
+    "\n",
+    "Here's the image we'll use:\n",
+    "\n",
+    "![I -heart- AutoGen](https://microsoft.github.io/autogen/assets/images/love-ec54b2666729d3e9d93f91773d1a77cf.png \"width=400 height=400\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_list_sonnet = {\n",
+    "    \"config_list\": [\n",
+    "        {\n",
+    "            \"api_type\": \"bedrock\",\n",
+    "            \"model\": \"anthropic.claude-3-sonnet-20240229-v1:0\",\n",
+    "            \"aws_region\": \"us-east-1\",\n",
+    "            \"aws_access_key\": \"[FILL THIS IN]\",\n",
+    "            \"aws_secret_key\": \"[FILL THIS IN]\",\n",
+    "            \"cache_seed\": None,\n",
+    "        }\n",
+    "    ]\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll use a Multimodal agent to handle the image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import autogen\n",
+    "from autogen import Agent, AssistantAgent, ConversableAgent, UserProxyAgent\n",
+    "from autogen.agentchat.contrib.capabilities.vision_capability import VisionCapability\n",
+    "from autogen.agentchat.contrib.img_utils import get_pil_image, pil_to_data_uri\n",
+    "from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent\n",
+    "from autogen.code_utils import content_str\n",
+    "\n",
+    "image_agent = MultimodalConversableAgent(\n",
+    "    name=\"image-explainer\",\n",
+    "    max_consecutive_auto_reply=10,\n",
+    "    llm_config=config_list_sonnet,\n",
+    ")\n",
+    "\n",
+    "user_proxy = autogen.UserProxyAgent(\n",
+    "    name=\"User_proxy\",\n",
+    "    system_message=\"A human admin.\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=0,\n",
+    "    code_execution_config={\n",
+    "        \"use_docker\": False\n",
+    "    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start the chat and use the `img` tag in the message. The image will be downloaded and converted to bytes, then sent to the LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mUser_proxy\u001b[0m (to image-explainer):\n",
+      "\n",
+      "What's happening in this image?\n",
+      "<image>.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[31m\n",
+      ">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
+      "\u001b[33mimage-explainer\u001b[0m (to User_proxy):\n",
+      "\n",
+      "This image appears to be an advertisement or promotional material for a company called Autogen. The central figure is a stylized robot or android holding up a signboard with the company's name on it. The signboard also features a colorful heart design made up of many smaller hearts, suggesting themes related to love, care, or affection. The robot has a friendly, cartoonish expression with a large blue eye or lens. The overall style and color scheme give it a vibrant, eye-catching look that likely aims to portray Autogen as an innovative, approachable technology brand focused on connecting with people.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Ask the image_agent to describe the image\n",
+    "result = user_proxy.initiate_chat(\n",
+    "    image_agent,\n",
+    "    message=\"\"\"What's happening in this image?\n",
+    "<img https://microsoft.github.io/autogen/assets/images/love-ec54b2666729d3e9d93f91773d1a77cf.png>.\"\"\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "front_matter": {
+   "description": "Define and load a custom model",
+   "tags": [
+    "custom model"
+   ]
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
+   }
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {
+     "2d910cfd2d2a4fc49fc30fbbdc5576a7": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "2.0.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border_bottom": null,
+       "border_left": null,
+       "border_right": null,
+       "border_top": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "454146d0f7224f038689031002906e6f": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "HBoxModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "2.0.0",
+       "_view_name": "HBoxView",
+       "box_style": "",
+       "children": [
+        "IPY_MODEL_e4ae2b6f5a974fd4bafb6abb9d12ff26",
+        "IPY_MODEL_577e1e3cc4db4942b0883577b3b52755",
+        "IPY_MODEL_b40bdfb1ac1d4cffb7cefcb870c64d45"
+       ],
+       "layout": "IPY_MODEL_dc83c7bff2f241309537a8119dfc7555",
+       "tabbable": null,
+       "tooltip": null
+      }
+     },
+     "577e1e3cc4db4942b0883577b3b52755": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "FloatProgressModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "2.0.0",
+       "_view_name": "ProgressView",
+       "bar_style": "success",
+       "description": "",
+       "description_allow_html": false,
+       "layout": "IPY_MODEL_2d910cfd2d2a4fc49fc30fbbdc5576a7",
+       "max": 1,
+       "min": 0,
+       "orientation": "horizontal",
+       "style": "IPY_MODEL_74a6ba0c3cbc4051be0a83e152fe1e62",
+       "tabbable": null,
+       "tooltip": null,
+       "value": 1
+      }
+     },
+     "6086462a12d54bafa59d3c4566f06cb2": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "2.0.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border_bottom": null,
+       "border_left": null,
+       "border_right": null,
+       "border_top": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "74a6ba0c3cbc4051be0a83e152fe1e62": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "ProgressStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "StyleView",
+       "bar_color": null,
+       "description_width": ""
+      }
+     },
+     "7d3f3d9e15894d05a4d188ff4f466554": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "HTMLStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "StyleView",
+       "background": null,
+       "description_width": "",
+       "font_size": null,
+       "text_color": null
+      }
+     },
+     "b40bdfb1ac1d4cffb7cefcb870c64d45": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "2.0.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_allow_html": false,
+       "layout": "IPY_MODEL_f1355871cc6f4dd4b50d9df5af20e5c8",
+       "placeholder": "​",
+       "style": "IPY_MODEL_ca245376fd9f4354af6b2befe4af4466",
+       "tabbable": null,
+       "tooltip": null,
+       "value": " 1/1 [00:00&lt;00:00, 44.69it/s]"
+      }
+     },
+     "ca245376fd9f4354af6b2befe4af4466": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "HTMLStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "StyleView",
+       "background": null,
+       "description_width": "",
+       "font_size": null,
+       "text_color": null
+      }
+     },
+     "dc83c7bff2f241309537a8119dfc7555": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "2.0.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border_bottom": null,
+       "border_left": null,
+       "border_right": null,
+       "border_top": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "e4ae2b6f5a974fd4bafb6abb9d12ff26": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "2.0.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "2.0.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_allow_html": false,
+       "layout": "IPY_MODEL_6086462a12d54bafa59d3c4566f06cb2",
+       "placeholder": "​",
+       "style": "IPY_MODEL_7d3f3d9e15894d05a4d188ff4f466554",
+       "tabbable": null,
+       "tooltip": null,
+       "value": "100%"
+      }
+     },
+     "f1355871cc6f4dd4b50d9df5af20e5c8": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "2.0.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "2.0.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border_bottom": null,
+       "border_left": null,
+       "border_right": null,
+       "border_top": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     }
+    },
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/website/docs/topics/non-openai-models/cloud-cohere.ipynb b/website/docs/topics/non-openai-models/cloud-cohere.ipynb
index fed5911475f..73dcc54a75e 100644
--- a/website/docs/topics/non-openai-models/cloud-cohere.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-cohere.ipynb
@@ -100,6 +100,7 @@
     "- seed (null, integer)\n",
     "- frequency_penalty (number 0..1)\n",
     "- presence_penalty (number 0..1)\n",
+    "- client_name (null, string)\n",
     "\n",
     "Example:\n",
     "```python\n",
@@ -108,6 +109,7 @@
     "        \"model\": \"command-r\",\n",
     "        \"api_key\": \"your Cohere API Key goes here\",\n",
     "        \"api_type\": \"cohere\",\n",
+    "        \"client_name\": \"autogen-cohere\",\n",
     "        \"temperature\": 0.5,\n",
     "        \"p\": 0.2,\n",
     "        \"k\": 100,\n",
@@ -421,7 +423,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We pass through our customers message and run the chat.\n",
+    "We pass through our customer's message and run the chat.\n",
     "\n",
     "Finally, we ask the LLM to summarise the chat and print that out."
    ]
@@ -526,7 +528,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.5"
   }
  },
  "nbformat": 4,
diff --git a/website/docs/topics/non-openai-models/cloud-gemini.ipynb b/website/docs/topics/non-openai-models/cloud-gemini.ipynb
index 70dc808df61..a227582c592 100644
--- a/website/docs/topics/non-openai-models/cloud-gemini.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-gemini.ipynb
@@ -94,7 +94,6 @@
     "from autogen import Agent, AssistantAgent, ConversableAgent, UserProxyAgent\n",
     "from autogen.agentchat.contrib.img_utils import _to_pil, get_image_data\n",
     "from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "from autogen.code_utils import DEFAULT_MODEL, UNKNOWN, content_str, execute_code, extract_code, infer_lang"
    ]
diff --git a/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb b/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb
index e618966dc6c..637d340dc37 100644
--- a/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-gemini_vertexai.ipynb
@@ -62,7 +62,12 @@
     "<img src=\"https://github.com/microsoft/autogen/blob/main/website/static/img/create_gcp_svc.png?raw=true\" width=\"1000\" />\n",
     "</div>\n",
     "\n",
-    "For the sake of simplicity we will assign the Editor role to our service account for autogen on our Autogen-with-Gemini Google Cloud project.\n",
+    "Next we assign the [Vertex AI User](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user) for the service account. This can be done in the [Google Cloud console](https://console.cloud.google.com/iam-admin/iam?project=autogen-with-gemini) in our `autogen-with-gemini` project.<br/>\n",
+    "Alternatively, we can also grant the [Vertex AI User](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user) role by running a command using the gcloud CLI, for example in [Cloud Shell](https://shell.cloud.google.com/cloudshell):\n",
+    "```bash\n",
+    "gcloud projects add-iam-policy-binding autogen-with-gemini \\\n",
+    "    --member=serviceAccount:autogen@autogen-with-gemini.iam.gserviceaccount.com --role roles/aiplatform.user\n",
+    "```\n",
     "\n",
     "* Under IAM & Admin > Service Account select the newly created service accounts, and click the option \"Manage keys\" among the items. \n",
     "* From the \"ADD KEY\" dropdown select \"Create new key\" and select the JSON format and click CREATE.\n",
@@ -83,7 +88,7 @@
     "Additionally, AutoGen also supports authentication using `Credentials` objects in Python with the [google-auth library](https://google-auth.readthedocs.io/), which enables even more flexibility.<br/>\n",
     "For example, we can even use impersonated credentials.\n",
     "\n",
-    "#### Use Service Account Keyfile\n",
+    "#### <a id='use_svc_keyfile'></a>Use Service Account Keyfile\n",
     "\n",
     "The Google Cloud service account can be specified by setting the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path to the JSON key file of the service account. <br/>\n",
     "\n",
@@ -91,7 +96,7 @@
     "\n",
     "#### Use the Google Default Credentials\n",
     "\n",
-    "If you are using [Cloud Shell](https://shell.cloud.google.com/cloudshell) or [Cloud Shell editor](https://shell.cloud.google.com/cloudshell/editor) in Google Cloud, <br/> then you are already authenticated. If you have the Google Cloud SDK installed locally,  <br/> then you can login by running `gcloud auth login` in the command line. \n",
+    "If you are using [Cloud Shell](https://shell.cloud.google.com/cloudshell) or [Cloud Shell editor](https://shell.cloud.google.com/cloudshell/editor) in Google Cloud, <br/> then you are already authenticated. If you have the Google Cloud SDK installed locally,  <br/> then you can login by running `gcloud auth application-default login` in the command line. \n",
     "\n",
     "Detailed instructions for installing the Google Cloud SDK can be found [here](https://cloud.google.com/sdk/docs/install).\n",
     "\n",
@@ -99,7 +104,7 @@
     "\n",
     "The google-auth library supports a wide range of authentication scenarios, and you can simply pass a previously created `Credentials` object to the `llm_config`.<br/>\n",
     "The [official documentation](https://google-auth.readthedocs.io/) of the Python package provides a detailed overview of the supported methods and usage examples.<br/>\n",
-    "If you are already authenticated, like in [Cloud Shell](https://shell.cloud.google.com/cloudshell), or after running the `gcloud auth login` command in a CLI, then the `google.auth.default()` Python method will automatically return your currently active credentials."
+    "If you are already authenticated, like in [Cloud Shell](https://shell.cloud.google.com/cloudshell), or after running the `gcloud auth application-default login` command in a CLI, then the `google.auth.default()` Python method will automatically return your currently active credentials."
    ]
   },
   {
@@ -181,7 +186,6 @@
     "from autogen import Agent, AssistantAgent, ConversableAgent, UserProxyAgent\n",
     "from autogen.agentchat.contrib.img_utils import _to_pil, get_image_data\n",
     "from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "from autogen.code_utils import DEFAULT_MODEL, UNKNOWN, content_str, execute_code, extract_code, infer_lang"
    ]
@@ -308,13 +312,13 @@
     "gcloud auth application-default login\n",
     "gcloud config set project autogen-with-gemini\n",
     "```\n",
-    "The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is a path to our service account JSON keyfile, as described in the [Use Service Account Keyfile](#Use Service Account Keyfile) section above.<br/>\n",
+    "The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is a path to our service account JSON keyfile, as described in the [Use Service Account Keyfile](#use_svc_keyfile) section above.<br/>\n",
     "We also need to set the Google cloud project, which is `autogen-with-gemini` in this example.<br/><br/>\n",
     "\n",
-    "Note, we could also run `gcloud auth login` in case we wish to use our personal Google account instead of a service account.\n",
+    "Note, we could also run `gcloud auth application-default login` to use our personal Google account instead of a service account.\n",
     "In this case we need to run the following commands:\n",
     "```bash\n",
-    "gcloud auth login\n",
+    "gcloud gcloud auth application-default login\n",
     "gcloud config set project autogen-with-gemini\n",
     "```"
    ]
@@ -395,7 +399,128 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": []
+   "source": [
+    "# Use Gemini via the OpenAI Library in Autogen\n",
+    "Using Gemini via the OpenAI library is also possible once you are already authenticated. <br/>\n",
+    "Run `gcloud auth application-default login` to set up application default credentials locally for the example below.<br/>\n",
+    "Also set the Google cloud project on the CLI if you have not done so far: <br/>\n",
+    "```bash\n",
+    "gcloud config set project autogen-with-gemini\n",
+    "```\n",
+    "The prerequisites are essentially the same as in the example above.<br/>\n",
+    "\n",
+    "You can read more on the topic in the [official Google docs](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-gemini-using-openai-library).\n",
+    "<br/> A list of currently supported models can also be found in the [docs](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-gemini-using-openai-library#supported_models)\n",
+    "<br/>\n",
+    "<br/>\n",
+    "Note, that you will need to refresh your token regularly, by default every 1 hour."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import google.auth\n",
+    "\n",
+    "scopes = [\"https://www.googleapis.com/auth/cloud-platform\"]\n",
+    "creds, project = google.auth.default(scopes)\n",
+    "auth_req = google.auth.transport.requests.Request()\n",
+    "creds.refresh(auth_req)\n",
+    "location = \"us-west1\"\n",
+    "prompt_price_per_1k = (\n",
+    "    0.000125  # For more up-to-date prices see https://cloud.google.com/vertex-ai/generative-ai/pricing\n",
+    ")\n",
+    "completion_token_price_per_1k = (\n",
+    "    0.000375  # For more up-to-date prices see https://cloud.google.com/vertex-ai/generative-ai/pricing\n",
+    ")\n",
+    "\n",
+    "openai_gemini_config = [\n",
+    "    {\n",
+    "        \"model\": \"google/gemini-1.5-pro-001\",\n",
+    "        \"api_type\": \"openai\",\n",
+    "        \"base_url\": f\"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project}/locations/{location}/endpoints/openapi\",\n",
+    "        \"api_key\": creds.token,\n",
+    "        \"price\": [prompt_price_per_1k, completion_token_price_per_1k],\n",
+    "    }\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33muser_proxy\u001b[0m (to assistant):\n",
+      "\n",
+      "\n",
+      "    Compute the integral of the function f(x)=x^3 on the interval 0 to 10 using a Python script,\n",
+      "    which returns the value of the definite integral.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33massistant\u001b[0m (to user_proxy):\n",
+      "\n",
+      "```python\n",
+      "# filename: integral.py\n",
+      "def integrate_x_cubed(a, b):\n",
+      "  \"\"\"\n",
+      "  This function calculates the definite integral of x^3 from a to b.\n",
+      "\n",
+      "  Args:\n",
+      "      a: The lower limit of integration.\n",
+      "      b: The upper limit of integration.\n",
+      "\n",
+      "  Returns:\n",
+      "      The value of the definite integral.\n",
+      "  \"\"\"\n",
+      "  return (b**4 - a**4) / 4\n",
+      "\n",
+      "# Calculate the integral of x^3 from 0 to 10\n",
+      "result = integrate_x_cubed(0, 10)\n",
+      "\n",
+      "# Print the result\n",
+      "print(result)\n",
+      "```\n",
+      "\n",
+      "This script defines a function `integrate_x_cubed` that takes the lower and upper limits of integration as arguments and returns the definite integral of x^3 using the power rule of integration. The script then calls this function with the limits 0 and 10 and prints the result.\n",
+      "\n",
+      "Execute the script `python integral.py`, you should get the result: `2500.0`.\n",
+      "\n",
+      "TERMINATE\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "assistant = AssistantAgent(\"assistant\", llm_config={\"config_list\": openai_gemini_config}, max_consecutive_auto_reply=3)\n",
+    "\n",
+    "user_proxy = UserProxyAgent(\n",
+    "    \"user_proxy\",\n",
+    "    code_execution_config={\"work_dir\": \"coding\", \"use_docker\": False},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    is_termination_msg=lambda x: content_str(x.get(\"content\")).find(\"TERMINATE\") >= 0,\n",
+    ")\n",
+    "\n",
+    "result = user_proxy.initiate_chat(\n",
+    "    assistant,\n",
+    "    message=\"\"\"\n",
+    "    Compute the integral of the function f(x)=x^3 on the interval 0 to 10 using a Python script,\n",
+    "    which returns the value of the definite integral.\"\"\",\n",
+    ")"
+   ]
   }
  ],
  "metadata": {
diff --git a/website/docs/topics/non-openai-models/transforms-for-nonopenai-models.ipynb b/website/docs/topics/non-openai-models/transforms-for-nonopenai-models.ipynb
new file mode 100644
index 00000000000..88f651aa16d
--- /dev/null
+++ b/website/docs/topics/non-openai-models/transforms-for-nonopenai-models.ipynb
@@ -0,0 +1,933 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Transform Messages for Non-OpenAI Models\n",
+    "\n",
+    "There are a large variety of models available beyond OpenAI's and they all have different capabilities. Smaller context windows and different API's can require tweaks to your workflow in order to work with them.\n",
+    "\n",
+    "If you're new to Transform Messages, see the [introduction to Transform Messages](/docs/topics/handling_long_contexts/intro_to_transform_messages).\n",
+    "\n",
+    "## Reducing context sizes\n",
+    "\n",
+    "Although context windows are increasing, there are still a large number of models that have context windows (e.g. 2K, 4K, or 8K tokens) which may be overwhelmed by your workflow's messages.\n",
+    "\n",
+    "To handle longer contexts using transforms, essentially reducing them effectively for smaller context windows, please see the page on [compressing text](/docs/topics/handling_long_contexts/compressing_text_w_llmligua).\n",
+    "\n",
+    "## Incorporating an agent's name\n",
+    "\n",
+    "Interestingly, the agent's name, such as Jack in the below example, is not included in messages when using non-OpenAI models. This means that there is no way of the name being known by the model during inference, unless we include it in the body of the message text.\n",
+    "\n",
+    "```python\n",
+    "comedian = ConversableAgent(\n",
+    "    name=\"Jack\", # Not included in messages for non-OpenAI inference\n",
+    "    llm_config=phi2,\n",
+    "    system_message=\"Your name is Jack and you are a comedian.\",\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "When using OpenAI models, the name field is included and examples in the AutoGen documentation may rely on this fact. Therefore, it may not be an issue in your existing workflows, however it's important to be aware of and be able to cater for it.\n",
+    "\n",
+    "In the simple two-agent chat example, below, we will use a `TextMessageContentName` transform, available from the Transforms module, to add in the name of the agents to the messages.\n",
+    "\n",
+    "As we won't be using OpenAI, we will use the Anthropic client to demonstrate.\n",
+    "\n",
+    "We'll start by importing our modules and setting our config."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from autogen import ConversableAgent\n",
+    "\n",
+    "config_list_claude = [\n",
+    "    {\n",
+    "        \"model\": \"claude-3-5-sonnet-20240620\",\n",
+    "        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n",
+    "        \"api_type\": \"anthropic\",\n",
+    "        \"cache_seed\": None,  # Ensure we're not caching any results\n",
+    "    }\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we add two agents, both comedians who will make up jokes about the other using their name."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cathy = ConversableAgent(\n",
+    "    \"Cathy\",\n",
+    "    system_message=\"Your name is Cathy and you are a part of a duo of comedians.\",\n",
+    "    llm_config={\"config_list\": config_list_claude},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")\n",
+    "\n",
+    "joe = ConversableAgent(\n",
+    "    \"Joe\",\n",
+    "    system_message=\"Your name is Joe and you are a part of a duo of comedians.\",\n",
+    "    llm_config={\"config_list\": config_list_claude},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's start the chat without using the transform and see how it performs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mJoe\u001b[0m (to Cathy):\n",
+      "\n",
+      "People say I have a funny name, tell me a joke about it.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mCathy\u001b[0m (to Joe):\n",
+      "\n",
+      "As Cathy, part of a comedy duo, I'd need to know your actual name to craft a personalized joke about it. Without that information, I can't create a specific joke tailored to your name. However, I can offer a more general joke about names if you'd like. Let me know if you want to share your name or if you'd prefer a different kind of joke!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mJoe\u001b[0m (to Cathy):\n",
+      "\n",
+      "I apologize for the confusion. You're right that as Cathy, you wouldn't know my name or background. I made a mistake in my previous response by assuming a context that wasn't provided. Thank you for the professional way you handled that.\n",
+      "\n",
+      "As Joe, the other half of your comedy duo, I'll play along with your prompt:\n",
+      "\n",
+      "\"Well, Cathy, you know what they say - people in glass houses shouldn't throw stones. But with a name like yours, maybe you should consider moving into a stone house! At least then when people say 'Cathy who?' you can say 'Cathy Stone, of course!'\"\n",
+      "\n",
+      "How's that for a quick improvised joke about your name, partner?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mCathy\u001b[0m (to Joe):\n",
+      "\n",
+      "Oh Joe, you crack me up! That's why we're such a great duo. I love how you turned that around on me. Let me give it a shot:\n",
+      "\n",
+      "\"Well, Joe, speaking of stones, I guess with a partner like you, I'm stuck between a rock and a hard place. But hey, at least with you around, I'll never be accused of being the average Joe in this act!\"\n",
+      "\n",
+      "How's that for a comeback, partner? I think we're really on a roll here. Should we take this act on the road?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = joe.initiate_chat(cathy, message=\"People say I have a funny name, tell me a joke about it.\", max_turns=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As we can see from the chat, Cathy doesn't know Joe's name at the start.\n",
+    "\n",
+    "Now, we'll create a transform that injects the names into the messages and apply that transform to both agents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import our transforms\n",
+    "from autogen.agentchat.contrib.capabilities import transform_messages\n",
+    "from autogen.agentchat.contrib.capabilities.transforms import TextMessageContentName\n",
+    "\n",
+    "# Create a name transform\n",
+    "# This will inject the agent's name for a message into the start of the message content.\n",
+    "# E.g. \"'Jack' said\\n...\"\n",
+    "name_transform = TextMessageContentName(position=\"start\", format_string=\"'{name}' said:\\n\")\n",
+    "\n",
+    "# Create the TransformMessages that will be applied.\n",
+    "# In this case we are only putting in one transform but you could\n",
+    "# stack the transforms if you also wanted to do others, like\n",
+    "# compress the text. Transforms are performed sequentially.\n",
+    "context_handling = transform_messages.TransformMessages(transforms=[name_transform])\n",
+    "\n",
+    "# Add it to both agents so when they run inference it will apply to the messages\n",
+    "context_handling.add_to_agent(cathy)\n",
+    "context_handling.add_to_agent(joe)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's try that chat again now that we're injecting the names."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mJoe\u001b[0m (to Cathy):\n",
+      "\n",
+      "People say I have a funny name, tell me a joke about it.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m1 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mCathy\u001b[0m (to Joe):\n",
+      "\n",
+      "Hey there Joe! I'm Cathy, one half of a comedy duo. You know, having a short name like Joe can be pretty fun to play with. Here's a little joke for you:\n",
+      "\n",
+      "Why did Joe's friends call him \"Volcano\"?\n",
+      "Because he was always erupting with short outbursts!\n",
+      "\n",
+      "Ba dumb tss! Okay, maybe not my best work, but I promise our duo's material is much funnier on stage. Names can be great comedy fodder - short ones, long ones, unusual ones. The key is finding the right angle. Got any funny stories about your name you'd like to share? Those personal anecdotes often make for the best laughs!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m2 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mJoe\u001b[0m (to Cathy):\n",
+      "\n",
+      "Thanks for the setup, Cathy! I like your joke - short and sweet, just like my name. Speaking of my name, here's a little quip I've used before:\n",
+      "\n",
+      "You know, I've always felt my parents really phoned it in when naming me. They must've been like, \"We need to name this kid... eh, Joe. Done. What's for dinner?\"\n",
+      "\n",
+      "But hey, at least it's easy to spell. Although sometimes I wonder if I should jazz it up a bit. Maybe go by \"Jo-seph\" or \"Joe-tastic.\" What do you think, Cathy? Any suggestions for spicing up a plain old \"Joe\"?\n",
+      "\n",
+      "And you're right, personal stories about names can be comedy gold. I once had a telemarketer absolutely butcher my name. They called and asked for \"Joo.\" I told them there's no Joo here, just a Joe. They apologized and asked for \"Hoe\" instead. At that point, I just had to laugh and hang up!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m3 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mCathy\u001b[0m (to Joe):\n",
+      "\n",
+      "'Cathy' said:\n",
+      "Oh Joe, you're a natural! I'm loving your material. That telemarketer story had me in stitches - from Joe to Joo to Hoe, what a wild ride! \n",
+      "\n",
+      "As for jazzing up your name, I've got a few suggestions that might tickle your funny bone:\n",
+      "\n",
+      "1. \"Joe-normous\" - for when you're feeling larger than life.\n",
+      "2. \"Joevius Maximus\" - if you're going for that Roman emperor vibe.\n",
+      "3. \"Joe-pacabra\" - half man, half mysterious creature.\n",
+      "4. \"Joehemoth\" - for those days when you feel particularly beastly.\n",
+      "5. \"Average Joe-seidon\" - god of the sea... and mediocrity.\n",
+      "\n",
+      "But honestly, Joe, I think you're selling yourself short (pun intended for your short name). Your delivery is spot-on, and you've got a great sense of timing. Have you ever considered doing stand-up? With material like that, you could be the next big thing in comedy. \n",
+      "\n",
+      "Just imagine the headline: \"Plain Old Joe Takes Comedy World by Storm!\" Now that's a name that would turn heads, don't you think?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = joe.initiate_chat(cathy, message=\"People say I have a funny name, tell me a joke about it.\", max_turns=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see from this conversation that Cathy uses Joe's name in her first response message, showing that incorporating the name using a transform has enabled the Cathy agent to *recognise* Joe.\n",
+    "\n",
+    "Where the transform used above becomes essential is in a Group Chat using the `auto` selection mode (default), when the Group Chat Manager is selecting an agent based on their `name`.\n",
+    "\n",
+    "## Transforms in group chats\n",
+    "\n",
+    "As noted above, it is important when using non-OpenAI models to inject the agent's name into the messages when you are using `auto` agent selection mode. By doing so, you are giving the model the best chance at understanding which agent belongs to each message.\n",
+    "\n",
+    "Additionally, group chats can involve a large number of messages and, therefore, tokens. So, to assist with keeping the context used within your model's context window you can use a compression transform.\n",
+    "\n",
+    "Below is a group chat example that incorporates these two transforms and relies on the LLM using agent names to determine and select the next agent.\n",
+    "\n",
+    "We'll use Anthropic's Claude 3.5 Sonnet config from the previous example as the LLM for all agents and the group chat manager (which selects the next agent to speak).\n",
+    "\n",
+    "The scenario in the example is the production of two kid-friendly long-form articles on cloud formations.\n",
+    "\n",
+    "Let's start by creating our three team members:\n",
+    "\n",
+    "- **Subject_Expert** will select a cloud formation and provide some bullet points about it.\n",
+    "- **Writer** will write long-form content, about 2,000 words, for the selected cloud formation based on the bullet points.\n",
+    "- **Scheduler** is responsible for delivering the task to the group chat manager, determining if we need to continue writing more articles, and to terminate the group chat by saying 'TERMINATE'."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sme_agent = ConversableAgent(\n",
+    "    \"Subject_Expert\",\n",
+    "    system_message=\"You're a subject matter expert on cloud formations and work in a team with a scheduler and a writer. Every time you're asked to speak it's for a new article. You must prepare for a new article by selecting a the cloud formation, providing a summary of that formation and the impact on weather, in bullet points. Make it kid friendly. Aim for a dozen bullet points. Your task is only to provide topics and bullet points on new articles, don't review any previously written articles.\",\n",
+    "    description=\"An expert on cloud formations, great at developing ideas to write about.\",\n",
+    "    llm_config={\"config_list\": config_list_claude},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")\n",
+    "\n",
+    "scheduler = ConversableAgent(\n",
+    "    \"Scheduler\",\n",
+    "    system_message=\"You're a marketing expert, managing the production of a specific number of articles. Count the number of articles written and once they have been written say the word 'TERMINATE'.\",\n",
+    "    description=\"A marketing expert that's excellent at managing the production of articles.\",\n",
+    "    llm_config={\"config_list\": config_list_claude},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")\n",
+    "\n",
+    "writer = ConversableAgent(\n",
+    "    \"Writer\",\n",
+    "    system_message=\"You're a writer of online news articles on scientific topics, written for an audience of primary school students. Aim for 2,000 words for each article.\",\n",
+    "    description=\"An excellent writer, takes given topics and writes long-form articles.\",\n",
+    "    llm_config={\"config_list\": config_list_claude},\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we create our two transforms, one for injecting the `name` and the other to compress the messages if the estimated token count for all messages combined is greater than 1,000 tokens.\n",
+    "\n",
+    "As these transforms will be applied to the nested chat in a group chat where the next speaker is selected, we add a filter to the transforms to not apply to `system` messages and to messages from the `checking_agent` who is the agent within the nested chat for selecting the next speaker.\n",
+    "\n",
+    "These exclusions are used to minimise any loss of instruction in those messages as they are critical for speaker selection."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua\n",
+    "from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor, TextMessageContentName\n",
+    "\n",
+    "# Create transform to inject name\n",
+    "# This will inject the agent's name for a message into the start of the message content.\n",
+    "# E.g. \"'Subject_Expert' said\\n...\"\n",
+    "name_transform = TextMessageContentName(\n",
+    "    position=\"start\",\n",
+    "    format_string=\"'{name}' said:\\n\",\n",
+    "    filter_dict={\n",
+    "        \"role\": [\"system\"],\n",
+    "        \"name\": [\"checking_agent\"],\n",
+    "    },  # don't add the name for the select speaker-specific nested-chat agents\n",
+    ")\n",
+    "\n",
+    "# Create transform to compress messages\n",
+    "# If you don't have LLMLingua installed: pip install LLMLingua\n",
+    "llm_lingua = LLMLingua()\n",
+    "compress_transform = TextMessageCompressor(\n",
+    "    text_compressor=llm_lingua,\n",
+    "    min_tokens=1000,  # Don't compress if total tokens in list of messages is <= 1000\n",
+    "    filter_dict={\n",
+    "        \"role\": [\"system\"],\n",
+    "        \"name\": [\"checking_agent\"],\n",
+    "    },  # don't compress messages specifically for the select speaker prompts\n",
+    ")\n",
+    "\n",
+    "# Create the TransformMessages that will be applied\n",
+    "# In this case we are only putting in one transform but you could\n",
+    "# stack the transforms if you also wanted to do others, like\n",
+    "# compressing the text. Transforms are performed sequentially.\n",
+    "select_speaker_transforms = transform_messages.TransformMessages(\n",
+    "    transforms=[\n",
+    "        compress_transform,\n",
+    "        name_transform,\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With transforms created, we can apply them to the group chat's select speaker nested chat.\n",
+    "\n",
+    "In addition to the application of the transforms to the group chat's `select_speaker_transform_messages` parameter, we are providing explicit instructions on the order of agents within the `select_speaker_message_template`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autogen import GroupChat, GroupChatManager\n",
+    "\n",
+    "group_chat = GroupChat(\n",
+    "    agents=[sme_agent, scheduler, writer],\n",
+    "    messages=[],\n",
+    "    max_round=10,\n",
+    "    select_speaker_message_template=\"\"\"You manage a team that produces and releases articles.\n",
+    "        The roles available in the team are:\n",
+    "        {roles}\n",
+    "        Take the task given and coordinate the production of one or more articles.\n",
+    "        The order for each article should be the Subject_Expert first, then the Writer to write an article, then the Scheduler to review and determine if more are required.\n",
+    "        Finally, you can output the word 'TERMINATE' to signify the end of the task.\"\"\",\n",
+    "    select_speaker_prompt_template=\"Read the above conversation, select the next person from {agentlist} and only return the role.\",\n",
+    "    # Transforms applied to the group chat speaker selection when in 'auto' mode\n",
+    "    select_speaker_transform_messages=select_speaker_transforms,\n",
+    "    select_speaker_auto_verbose=True,  # See the selection process\n",
+    ")\n",
+    "\n",
+    "manager = GroupChatManager(\n",
+    "    groupchat=group_chat,\n",
+    "    llm_config={\"config_list\": config_list_claude},\n",
+    "    is_termination_msg=lambda x: \"TERMINATE\" in x.get(\"content\", \"\"),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will also apply the transforms to each of the agents so that when they are getting the messages to respond to they are compressed and have the names of the agents injected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We add the transforms to the team of agents so they understand who has said what and the messages are compressed to save tokens\n",
+    "select_speaker_transforms.add_to_agent(sme_agent)\n",
+    "select_speaker_transforms.add_to_agent(scheduler)\n",
+    "select_speaker_transforms.add_to_agent(writer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Start the chat and show the cost at the end.\n",
+    "\n",
+    "Note: `select_speaker_auto_verbose` was set to True on the group chat so you can see the speaker selection process in between each message."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mScheduler\u001b[0m (to chat_manager):\n",
+      "\n",
+      "We need a couple of articles on different cloud formations, let's get some help on creating them!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "Read the above conversation, select the next person from ['Subject_Expert', 'Scheduler', 'Writer'] and only return the role.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m1 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Subject_Expert\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: Subject_Expert\u001b[0m\n",
+      "\u001b[32m\n",
+      "Next speaker: Subject_Expert\n",
+      "\u001b[0m\n",
+      "\u001b[33m1 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mSubject_Expert\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Certainly! I'd be happy to provide topics and bullet points for an article on a specific cloud formation. Let's start with one of the most recognizable cloud types: Cumulus clouds.\n",
+      "\n",
+      "Cloud Formation: Cumulus Clouds\n",
+      "\n",
+      "Summary:\n",
+      "Cumulus clouds are puffy, white clouds that look like floating cotton balls in the sky. They're often associated with fair weather and are a common sight on sunny days.\n",
+      "\n",
+      "Bullet points for a kid-friendly article:\n",
+      "\n",
+      "• Cumulus clouds are nicknamed \"fair weather clouds\"\n",
+      "• They look like fluffy white cotton balls or popcorn in the sky\n",
+      "• Cumulus clouds usually have flat bottoms and rounded tops\n",
+      "• They form when warm air rises from the ground and cools as it goes up\n",
+      "• These clouds are made of tiny water droplets\n",
+      "• Cumulus clouds typically appear on sunny, pleasant days\n",
+      "• They often form in the late morning and disappear by evening\n",
+      "• Sometimes cumulus clouds can grow into big thunderstorms\n",
+      "• Pilots of small planes often avoid flying through cumulus clouds because they can be bumpy\n",
+      "• Cumulus clouds can cast fun shadows on the ground\n",
+      "• They're great for cloud-watching and imagining shapes (like animals or objects)\n",
+      "• Artists often include cumulus clouds in their paintings of sunny landscapes\n",
+      "\n",
+      "This information should provide a good starting point for an engaging, kid-friendly article about cumulus clouds. Let me know if you'd like bullet points on another cloud formation for the second article!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "Read the above conversation, select the next person from ['Subject_Expert', 'Scheduler', 'Writer'] and only return the role.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m2 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Writer\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: Writer\u001b[0m\n",
+      "\u001b[32m\n",
+      "Next speaker: Writer\n",
+      "\u001b[0m\n",
+      "\u001b[33m2 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mWriter\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Thank you for the excellent outline on cumulus clouds. I'll use this information to create a 2000-word article for primary school students on cumulus clouds. Here's the article:\n",
+      "\n",
+      "Fluffy Friends in the Sky: All About Cumulus Clouds\n",
+      "\n",
+      "Have you ever looked up at the sky on a beautiful sunny day and seen big, puffy white clouds that look like giant cotton balls floating high above? Those clouds have a special name – they're called cumulus clouds! In this article, we're going to learn all about these fascinating clouds that paint our skies with their fluffy shapes. So, grab your imagination, and let's go on a cloud adventure!\n",
+      "\n",
+      "What Are Cumulus Clouds?\n",
+      "\n",
+      "Cumulus clouds are one of the most common and easily recognizable types of clouds in our sky. Their name comes from the Latin word \"cumulus,\" which means \"heap\" or \"pile.\" This makes sense because these clouds look like big heaps of cotton or fluffy piles of popcorn floating in the air.\n",
+      "\n",
+      "These clouds are famous for their distinct shape. They have flat bottoms and rounded, puffy tops that often look like they're bubbling up into the sky. Cumulus clouds are usually bright white, especially on the parts that face the sun. Sometimes, the bottom parts of the clouds can look a bit grey, especially if they're very big.\n",
+      "\n",
+      "Cumulus clouds are so well-known and loved that they've earned a special nickname. People often call them \"fair weather clouds\" because they're usually seen on nice, sunny days when the weather is pleasant. When you see cumulus clouds in the sky, it usually means it's a great day to play outside!\n",
+      "\n",
+      "How Do Cumulus Clouds Form?\n",
+      "\n",
+      "Now that we know what cumulus clouds look like, let's explore how these fluffy sky friends come to be. The process of cumulus cloud formation is quite interesting and involves some cool science!\n",
+      "\n",
+      "It all starts with the sun warming up the ground. As the ground gets warm, it heats the air right above it. This warm air starts to rise because hot air is lighter than cold air. As the warm air goes up, it begins to cool down. This is because the higher you go in the atmosphere, the colder it gets.\n",
+      "\n",
+      "When the rising warm air cools down enough, something magical happens. The water vapor (tiny bits of water that float in the air) in this cooling air starts to condense. Condensation is when water vapor turns back into liquid water. This creates tiny water droplets that clump together to form the cloud.\n",
+      "\n",
+      "The process doesn't stop there! As long as there's warm air rising from below, the cloud keeps growing bigger and puffier. That's why cumulus clouds often look like they're bubbling or boiling at the top. They're constantly growing and changing shape as more warm air rises and more water droplets form.\n",
+      "\n",
+      "Interestingly, even though cumulus clouds look super fluffy and solid, they're actually made up of millions of tiny water droplets floating in the air. If you could touch a cumulus cloud (which, unfortunately, you can't), it would feel more like fog than a fluffy pillow!\n",
+      "\n",
+      "When Can We See Cumulus Clouds?\n",
+      "\n",
+      "One of the cool things about cumulus clouds is that they follow a bit of a daily schedule. They're like nature's clock in the sky!\n",
+      "\n",
+      "Cumulus clouds typically start to form in the late morning. This is when the sun has had enough time to warm up the ground and get those pockets of warm air rising. As the day goes on and gets warmer, you might see more and more cumulus clouds popping up in the sky.\n",
+      "\n",
+      "These clouds usually reach their peak in the afternoon when the day is at its warmest. This is the best time for cloud watching! You might see lots of different shapes and sizes of cumulus clouds dotting the blue sky.\n",
+      "\n",
+      "As evening approaches and the air starts to cool down, cumulus clouds often begin to disappear. Without the warm rising air to keep them growing, these clouds tend to evaporate and fade away. By nighttime, the sky is often clear again, ready for stars to twinkle.\n",
+      "\n",
+      "Remember, though, that weather can be unpredictable. Sometimes cumulus clouds stick around longer, and sometimes they might not form at all. It all depends on the conditions in the atmosphere that day.\n",
+      "\n",
+      "Cumulus Clouds and Weather\n",
+      "\n",
+      "While cumulus clouds are often called \"fair weather clouds,\" they can actually tell us a lot about what's happening in the atmosphere and what kind of weather we might expect.\n",
+      "\n",
+      "On most days, seeing cumulus clouds means the weather is likely to stay nice. These small to medium-sized puffy clouds usually indicate stable air and good weather conditions. They're the kind of clouds you want to see on a picnic day or when you're heading to the beach!\n",
+      "\n",
+      "However, cumulus clouds can sometimes grow into much larger clouds called cumulonimbus clouds. These are the big, tall clouds that can bring thunderstorms. If you see cumulus clouds starting to grow very tall and dark at the bottom, it might be a sign that a storm is brewing.\n",
+      "\n",
+      "Pilots of small airplanes often try to avoid flying through cumulus clouds. Even though these clouds look soft and fluffy from the ground, they can create bumpy air currents that can make for an uncomfortable flight. Big cumulus clouds can also have strong up and down air movements inside them, which can be challenging for small aircraft to navigate.\n",
+      "\n",
+      "Fun with Cumulus Clouds\n",
+      "\n",
+      "Cumulus clouds aren't just interesting to learn about – they're also great for having fun! Here are some enjoyable activities you can do with cumulus clouds:\n",
+      "\n",
+      "1. Cloud Watching: On a day with lots of cumulus clouds, lie down on the grass and look up at the sky. Let your imagination run wild! What shapes can you see in the clouds? Maybe you'll spot a rabbit, a dragon, or even a sailing ship! Cloud watching is a great way to relax and be creative.\n",
+      "\n",
+      "2. Cloud Shadows: On sunny days with scattered cumulus clouds, watch how the clouds cast shadows on the ground. These shadows move as the clouds drift across the sky, creating a constantly changing pattern on the earth below.\n",
+      "\n",
+      "3. Cloud Photography: If you have a camera or a smartphone, try taking pictures of cumulus clouds. You might capture some really interesting shapes or beautiful scenes of clouds against a blue sky.\n",
+      "\n",
+      "4. Cloud Diary: Keep a cloud diary for a week or a month. Each day, look out the window and draw or describe the clouds you see. Over time, you'll start to notice patterns in how the clouds change with the weather.\n",
+      "\n",
+      "5. Cloud in a Jar Experiment: With an adult's help, you can even make your own cumulus cloud in a jar! This fun science experiment helps you understand how these clouds form.\n",
+      "\n",
+      "Cumulus Clouds in Art and Culture\n",
+      "\n",
+      "Cumulus clouds are so beautiful and recognizable that they've become a popular subject in art and culture.\n",
+      "\n",
+      "Many famous painters have included cumulus clouds in their landscapes. Artists like Vincent van Gogh and Claude Monet often painted scenes with big, puffy clouds floating over fields or water. These clouds add depth and movement to paintings, making the scenes feel alive and dynamic.\n",
+      "\n",
+      "In children's books and cartoons, cumulus clouds are often drawn as simple white puffs. They're used to show that it's a nice day in the story. Sometimes, characters in cartoons even sit or sleep on these fluffy clouds (even though in real life, you'd fall right through a cloud!).\n",
+      "\n",
+      "Cumulus clouds have also inspired many writers and poets. Their ever-changing shapes and the way they float peacefully across the sky have been described in countless poems and stories.\n",
+      "\n",
+      "In some cultures, cumulus clouds have even played a role in weather folklore. For example, there's an old saying: \"If clouds are puffy like cotton on high, then dry weather's coming by and by.\" This shows how people have long used cumulus clouds as a way to predict the weather.\n",
+      "\n",
+      "The Science Behind Cumulus Clouds\n",
+      "\n",
+      "While cumulus clouds might look simple, there's actually a lot of complex science happening inside them. Here are some fascinating scientific facts about cumulus clouds:\n",
+      "\n",
+      "1. Temperature: The temperature inside a cumulus cloud is usually below freezing, even on a warm day! This is because the cloud is high up in the atmosphere where it's much colder.\n",
+      "\n",
+      "2. Weight: Even though cumulus clouds look light and fluffy, they're actually quite heavy. An average cumulus cloud can weigh as much as 100 elephants! But don't worry – they stay up in the sky because the tiny water droplets are spread out over a large area.\n",
+      "\n",
+      "3. Movement: Cumulus clouds move with the wind. The speed at which they move can tell us how fast the wind is blowing high up in the atmosphere.\n",
+      "\n",
+      "4. Life Span: Most cumulus clouds only last for about 15 to 20 minutes before they evaporate. However, in the right conditions, they can last for hours, constantly reforming as they move across the sky.\n",
+      "\n",
+      "5. Size: While most cumulus clouds are fairly small, some can grow to be enormous. The biggest cumulus clouds can be as tall as a 10-story building!\n",
+      "\n",
+      "Conclusion\n",
+      "\n",
+      "Cumulus clouds are truly amazing parts of our sky. From their fluffy, cotton-like appearance to the complex science behind their formation, these clouds have so much to teach us about weather and the atmosphere.\n",
+      "\n",
+      "Next time you're outside on a nice day, take a moment to look up at the sky. If you see those familiar puffy white clouds, you'll know you're looking at cumulus clouds. Remember how they form, think about the tiny water droplets that make them up, and let your imagination run wild with the shapes you see.\n",
+      "\n",
+      "Cumulus clouds are more than just beautiful sights in the sky. They're a reminder of the constant changes happening in our atmosphere, the amazing processes of nature, and the wonder of the world around us. So keep your eyes on the skies, and enjoy the fluffy, fascinating world of cumulus clouds!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "Read the above conversation, select the next person from ['Subject_Expert', 'Scheduler', 'Writer'] and only return the role.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2324 > 512). Running this sequence through the model will result in indexing errors\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33m1282 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m3 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Scheduler\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: Scheduler\u001b[0m\n",
+      "\u001b[32m\n",
+      "Next speaker: Scheduler\n",
+      "\u001b[0m\n",
+      "\u001b[33m1282 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m3 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mScheduler\u001b[0m (to chat_manager):\n",
+      "\n",
+      "Thank you for providing that detailed article on cumulus clouds. It's a well-written piece that covers the topic thoroughly for primary school students. Since we now have one complete article, I'll count that as the first one written.\n",
+      "\n",
+      "Articles written: 1\n",
+      "\n",
+      "Please continue with the next article or task.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "Read the above conversation, select the next person from ['Subject_Expert', 'Scheduler', 'Writer'] and only return the role.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m1311 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m4 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Subject_Expert\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: Subject_Expert\u001b[0m\n",
+      "\u001b[32m\n",
+      "Next speaker: Subject_Expert\n",
+      "\u001b[0m\n",
+      "\u001b[33m1311 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m4 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mSubject_Expert\u001b[0m (to chat_manager):\n",
+      "\n",
+      "'Subject_Expert' said:\n",
+      "Excellent, I'm glad the article on cumulus clouds was well-received. For our next article, let's focus on stratus clouds. Here's an outline with kid-friendly bullet points on stratus clouds:\n",
+      "\n",
+      "Cloud Formation: Stratus Clouds\n",
+      "\n",
+      "• Stratus clouds are low-lying, gray clouds that often cover the entire sky\n",
+      "• They look like a big, gray blanket stretched across the sky\n",
+      "• The word \"stratus\" comes from the Latin word for \"layer\" or \"spread out\"\n",
+      "• These clouds usually form less than 6,000 feet (1,800 meters) above the ground\n",
+      "• Stratus clouds often bring light rain or drizzle, but not heavy downpours\n",
+      "• Sometimes, stratus clouds can touch the ground - we call this fog!\n",
+      "• They form when a large area of warm air rises slowly and cools down\n",
+      "• Stratus clouds can make the day look gloomy and overcast\n",
+      "• They don't have distinct shapes like cumulus clouds do\n",
+      "• These clouds can stick around for hours or even days\n",
+      "• Stratus clouds are common in cool, damp climates\n",
+      "• They often form in the morning and can clear up by afternoon\n",
+      "• Pilots need to be careful when flying through stratus clouds because they can reduce visibility\n",
+      "\n",
+      "This outline provides a good starting point for an engaging article about stratus clouds, suitable for primary school students. It covers the basic characteristics, formation, and impact of stratus clouds on weather and daily life.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "Read the above conversation, select the next person from ['Subject_Expert', 'Scheduler', 'Writer'] and only return the role.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m1475 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m5 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Writer\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: Writer\u001b[0m\n",
+      "\u001b[32m\n",
+      "Next speaker: Writer\n",
+      "\u001b[0m\n",
+      "\u001b[33m1475 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m5 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mWriter\u001b[0m (to chat_manager):\n",
+      "\n",
+      "'Writer' said:\n",
+      "Thank you for the excellent outline on stratus clouds. I'll use this information to create a 2000-word article for primary school students about stratus clouds. Here's the article:\n",
+      "\n",
+      "The Gray Blanket in the Sky: All About Stratus Clouds\n",
+      "\n",
+      "Have you ever woken up to a gloomy day where the whole sky looks like one big gray blanket? Those aren't just any clouds you're seeing – they're called stratus clouds! Let's embark on a cloudy adventure to learn all about these fascinating sky coverings that can make the world look a bit dreary but play an important role in our weather.\n",
+      "\n",
+      "What are Stratus Clouds?\n",
+      "\n",
+      "Stratus clouds are low-lying, gray clouds that often cover the entire sky. Their name comes from the Latin word \"stratus,\" which means \"layer\" or \"spread out.\" That's exactly what these clouds do – they spread out across the sky like a huge, fluffy gray blanket!\n",
+      "\n",
+      "Unlike their puffy cousins, the cumulus clouds, stratus clouds don't have distinct shapes. You won't see any fun animals or objects in stratus clouds. Instead, they create a uniform, featureless layer that can stretch as far as the eye can see.\n",
+      "\n",
+      "Stratus clouds are low clouds, which means they form pretty close to the ground. In fact, they usually hang out less than 6,000 feet (or about 1,800 meters) above the Earth's surface. Sometimes, they can be so low that they touch the ground – when this happens, we call it fog!\n",
+      "\n",
+      "How Do Stratus Clouds Form?\n",
+      "\n",
+      "The formation of stratus clouds is an interesting process that involves some cool science. Here's how it happens:\n",
+      "\n",
+      "1. Warm Air Rises: Just like with other cloud types, it all starts with warm air rising from the Earth's surface.\n",
+      "\n",
+      "2. Cooling Process: As this warm air rises, it starts to cool down. Remember, the higher you go in the atmosphere, the colder it gets!\n",
+      "\n",
+      "3. Water Vapor Condenses: The cooling air can't hold as much water vapor (invisible water in the air) as warm air can. So, as the air cools, the water vapor starts to condense, turning into tiny water droplets.\n",
+      "\n",
+      "4. Cloud Formation: These tiny water droplets clump together to form the cloud. In the case of stratus clouds, this happens in a wide, flat layer.\n",
+      "\n",
+      "5. Stable Atmosphere: Stratus clouds often form when the atmosphere is stable, meaning there's not much mixing between different layers of air. This allows the cloud to spread out in a smooth, even layer.\n",
+      "\n",
+      "Stratus clouds can form in a couple of different ways:\n",
+      "\n",
+      "• When a layer of warm air moves over a cooler surface (like when warm air moves over a cool ocean).\n",
+      "• When snow or rain evaporates as it falls through dry air near the ground, cooling and moistening that air until it becomes saturated and forms a cloud.\n",
+      "\n",
+      "Weather Associated with Stratus Clouds\n",
+      "\n",
+      "When you see stratus clouds, it's a good idea to grab a jacket or an umbrella! These clouds often bring gloomy, overcast weather. Here's what you can expect when stratus clouds are in the sky:\n",
+      "\n",
+      "1. Light Rain or Drizzle: Stratus clouds can produce light rain or drizzle. This isn't the heavy downpour you might see with thunderstorms, but more of a constant, gentle sprinkle.\n",
+      "\n",
+      "2. Fog: Remember how we said stratus clouds can touch the ground? When they do, we call it fog. So if you've ever walked through a foggy morning, you've actually been inside a stratus cloud!\n",
+      "\n",
+      "3. Cool Temperatures: Because stratus clouds block out the sun, days with these clouds tend to be cooler than clear, sunny days.\n",
+      "\n",
+      "4. Low Visibility: The thick layer of stratus clouds can make it hard to see very far, especially if they're low to the ground or if it's foggy.\n",
+      "\n",
+      "5. Long-lasting: Unlike some other cloud types that come and go quickly, stratus clouds can stick around for hours or even days, especially in cool, damp climates.\n",
+      "\n",
+      "Stratus Clouds Around the World\n",
+      "\n",
+      "Stratus clouds are common in many parts of the world, but they're especially frequent in certain areas:\n",
+      "\n",
+      "• Coastal Regions: Places near the ocean often see a lot of stratus clouds, especially in the morning. The cool water can cause warm air to cool and form these layered clouds.\n",
+      "\n",
+      "• Cool, Damp Climates: Areas with cool, moist weather (like the Pacific Northwest in the United States or parts of the United Kingdom) frequently have stratus cloud cover.\n",
+      "\n",
+      "• Arctic and Antarctic Regions: The cold polar regions often have stratus clouds, contributing to their characteristically gray skies.\n",
+      "\n",
+      "Stratus Clouds and Daily Life\n",
+      "\n",
+      "Stratus clouds might not be as fun to look at as puffy cumulus clouds, but they still have a big impact on our daily lives:\n",
+      "\n",
+      "1. Agriculture: Farmers pay attention to stratus clouds because they can bring needed moisture for crops, but too many cloudy days can reduce sunlight for plant growth.\n",
+      "\n",
+      "2. Solar Power: Stratus clouds can reduce the effectiveness of solar panels by blocking out sunlight.\n",
+      "\n",
+      "3. Aviation: Pilots need to be very careful when flying through stratus clouds because they can greatly reduce visibility.\n",
+      "\n",
+      "4. Mood: Some people find that long periods of stratus cloud cover can affect their mood, making them feel a bit gloomy. That's why sunny days feel so good after a long stretch of cloudy weather!\n",
+      "\n",
+      "5. Photography: While stratus clouds might not make for the most exciting cloud photos, they can create a soft, even light that photographers sometimes prefer for certain types of pictures.\n",
+      "\n",
+      "Fun Facts About Stratus Clouds\n",
+      "\n",
+      "Let's explore some interesting tidbits about these gray sky blankets:\n",
+      "\n",
+      "1. Cloud Seeding: Sometimes, scientists try to make it rain by \"seeding\" stratus clouds. They drop tiny particles into the clouds to help water droplets form and fall as rain.\n",
+      "\n",
+      "2. Natural Air Conditioning: Stratus clouds act like nature's air conditioning, keeping the Earth cool by reflecting sunlight back into space.\n",
+      "\n",
+      "3. Cloud Forests: In some mountainous tropical areas, stratus clouds constantly cover the forest, creating a unique ecosystem called a \"cloud forest.\"\n",
+      "\n",
+      "4. Noctilucent Clouds: The highest clouds in Earth's atmosphere, called noctilucent clouds, are a special type of stratus cloud that forms in the mesosphere, about 50 miles (80 km) above the Earth's surface!\n",
+      "\n",
+      "5. Cloud Naming: Stratus clouds are part of the ten basic cloud types identified by Luke Howard in 1803. He's known as the \"father of meteorology\" for his work in classifying clouds.\n",
+      "\n",
+      "Stratus Cloud Variations\n",
+      "\n",
+      "While stratus clouds are generally uniform layers, there are a few variations:\n",
+      "\n",
+      "1. Stratus Nebulosus: This is the classic, featureless gray layer we typically think of as stratus clouds.\n",
+      "\n",
+      "2. Stratus Fractus: These are ragged, broken pieces of stratus clouds, often seen during or after rain.\n",
+      "\n",
+      "3. Altostratus: These are similar to stratus but form at higher altitudes, creating a thinner, more translucent layer.\n",
+      "\n",
+      "4. Nimbostratus: These are thick, dark stratus clouds that produce steady rain or snow.\n",
+      "\n",
+      "Activities to Learn About Stratus Clouds\n",
+      "\n",
+      "Even though stratus clouds aren't as visually exciting as some other cloud types, there are still fun ways to learn about them:\n",
+      "\n",
+      "1. Cloud Diary: Keep a cloud diary for a week or a month. Draw or describe the clouds you see each day, noting when you observe stratus clouds.\n",
+      "\n",
+      "2. Make a Cloud in a Jar: With an adult's help, you can create a miniature stratus cloud in a jar using hot water, hairspray, and ice.\n",
+      "\n",
+      "3. Fog Observation: On a foggy day, go outside (with an adult) and observe how it feels to be inside a stratus cloud that's touching the ground.\n",
+      "\n",
+      "4. Weather Station: Set up a simple weather station at home or school. Track temperature, humidity, and cloud cover, noting how these change when stratus clouds are present.\n",
+      "\n",
+      "5. Cloud Art: Create art inspired by stratus clouds. You could use gray paint or pencils to create the effect of a cloudy sky, or try making a collage using different shades of gray paper.\n",
+      "\n",
+      "Conclusion\n",
+      "\n",
+      "Stratus clouds might not be the most exciting clouds in the sky, but they're an important part of our weather and have a big impact on our daily lives. From creating gloomy, drizzly days to helping cool our planet, these low-lying gray blankets play a crucial role in Earth's atmosphere.\n",
+      "\n",
+      "Next time you wake up to a gray, overcast day, remember that you're looking at stratus clouds. Think about how they formed, what kind of weather they might bring, and how they're affecting the world around you. Even on a cloudy day, there's always something interesting to learn about the sky above us!\n",
+      "\n",
+      "So keep your eyes on the skies, young meteorologists, and enjoy exploring the fascinating world of stratus clouds!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "Read the above conversation, select the next person from ['Subject_Expert', 'Scheduler', 'Writer'] and only return the role.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m2485 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m6 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Please continue.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[31m>>>>>>>> Select speaker attempt #1 failed as it did not include any agent names.\u001b[0m\n",
+      "\u001b[33mchecking_agent\u001b[0m (to speaker_selection_agent):\n",
+      "\n",
+      "You didn't choose a speaker. As a reminder, to determine the speaker use these prioritised rules:\n",
+      "    1. If the context refers to themselves as a speaker e.g. \"As the...\" , choose that speaker's name\n",
+      "    2. If it refers to the \"next\" speaker name, choose that name\n",
+      "    3. Otherwise, choose the first provided speaker's name in the context\n",
+      "    The names are case-sensitive and should not be abbreviated or changed.\n",
+      "    The only names that are accepted are ['Subject_Expert', 'Scheduler', 'Writer'].\n",
+      "    Respond with ONLY the name of the speaker and DO NOT provide a reason.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33m2487 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m7 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mspeaker_selection_agent\u001b[0m (to checking_agent):\n",
+      "\n",
+      "Scheduler\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[32m>>>>>>>> Select speaker attempt 2 of 3 successfully selected: Scheduler\u001b[0m\n",
+      "\u001b[32m\n",
+      "Next speaker: Scheduler\n",
+      "\u001b[0m\n",
+      "\u001b[33m2485 tokens saved with text compression.\u001b[0m\n",
+      "\u001b[33m6 message(s) changed to incorporate name.\u001b[0m\n",
+      "\u001b[33mScheduler\u001b[0m (to chat_manager):\n",
+      "\n",
+      "'Scheduler' said:\n",
+      "Excellent work! You've now completed two well-written articles: one on cumulus clouds and another on stratus clouds. Both are informative and engaging for primary school students.\n",
+      "\n",
+      "Articles written: 2\n",
+      "\n",
+      "Since we've reached our target number of articles, I'll now say: TERMINATE\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "The cost of the chat was:\n",
+      "{'usage_including_cached_inference': {'total_cost': 0.013266, 'claude-3-5-sonnet-20240620': {'cost': 0.013266, 'prompt_tokens': 3732, 'completion_tokens': 138, 'total_tokens': 3870}}, 'usage_excluding_cached_inference': {'total_cost': 0.013266, 'claude-3-5-sonnet-20240620': {'cost': 0.013266, 'prompt_tokens': 3732, 'completion_tokens': 138, 'total_tokens': 3870}}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat_result = scheduler.initiate_chat(\n",
+    "    recipient=manager,\n",
+    "    message=\"We need a couple of articles on different cloud formations, let's get some help on creating them!\",\n",
+    ")\n",
+    "\n",
+    "print(f\"The cost of the chat was:\\n{chat_result.cost}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There's a lot to digest in the above code, let's break it down.\n",
+    "\n",
+    "1. It successfully did the task, getting two articles written (Yay!)\n",
+    "2. The sequence of agents was selected successfully: Scheduler to Subject_Expert to Writer then back to Scheduler to Subject_Expert to Writer to Scheduler who then terminates\n",
+    "3. Transform messages show the number of messages incorporating the name and the tokens saved during the process\n",
+    "4. We occasionally see the next agent name \"Please continue.\" being proposed by the LLM and this is because these continuation messages are inter-woven in the messages sent to Anthropic's API. This is handled well with the select speaker retries, but further prompt tuning could help eliminate these anomalies\n",
+    "\n",
+    "Additional notes:\n",
+    "\n",
+    "- Would this have worked without the transforms? Taking out the transforms resulted in a run producing the following incorrect sequence: Scheduler to Subject_Export to Writer back to Writer then to Scheduler.\n",
+    "- Tweaking - tweaking the system messages and descriptions for agents and the group chat select speaker nested chat also played a large role in steering the LLM to the correct output. A combination of prompt engineering and the transforms may be required to achieve consistent results.\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "autogen",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/website/docs/topics/retrieval_augmentation.md b/website/docs/topics/retrieval_augmentation.md
index 366893cb982..3c428f16486 100644
--- a/website/docs/topics/retrieval_augmentation.md
+++ b/website/docs/topics/retrieval_augmentation.md
@@ -2,16 +2,20 @@
 
 Retrieval Augmented Generation (RAG) is a powerful technique that combines language models with external knowledge retrieval to improve the quality and relevance of generated responses.
 
-One way to realize RAG in AutoGen is to construct agent chats with `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` classes.
+One way to realize RAG in AutoGen is to construct agent chats with `AssistantAgent` and `RetrieveUserProxyAgent` classes.
 
 ## Example Setup: RAG with Retrieval Augmented Agents
 The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen:
 
-### Step 1. Create an instance of `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`.
+### Step 1. Create an instance of `AssistantAgent` and `RetrieveUserProxyAgent`.
 
 Here `RetrieveUserProxyAgent` instance acts as a proxy agent that retrieves relevant information based on the user's input.
+
+Refer to the [doc](https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent)
+for more information on the detailed configurations.
+
 ```python
-assistant = RetrieveAssistantAgent(
+assistant = AssistantAgent(
     name="assistant",
     system_message="You are a helpful assistant.",
     llm_config={
@@ -56,14 +60,14 @@ ragproxyagent.initiate_chat(
 ## Example Setup: RAG with Retrieval Augmented Agents with PGVector
 The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen:
 
-### Step 1. Create an instance of `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`.
+### Step 1. Create an instance of `AssistantAgent` and `RetrieveUserProxyAgent`.
 
 Here `RetrieveUserProxyAgent` instance acts as a proxy agent that retrieves relevant information based on the user's input.
 
 Specify the connection_string, or the host, port, database, username, and password in the db_config.
 
 ```python
-assistant = RetrieveAssistantAgent(
+assistant = AssistantAgent(
     name="assistant",
     system_message="You are a helpful assistant.",
     llm_config={
@@ -121,6 +125,7 @@ For more detailed examples and notebooks showcasing the usage of retrieval augme
 - Automated Code Generation and Question Answering with Retrieval Augmented Agents - [View Notebook](/docs/notebooks/agentchat_RetrieveChat)
 - Automated Code Generation and Question Answering with [PGVector](https://github.com/pgvector/pgvector) based Retrieval Augmented Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_pgvector.ipynb)
 - Automated Code Generation and Question Answering with [Qdrant](https://qdrant.tech/) based Retrieval Augmented Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_qdrant.ipynb)
+- Automated Code Generation and Question Answering with [MongoDB Atlas](https://www.mongodb.com/) based Retrieval Augmented Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat_mongodb.ipynb)
 - Chat with OpenAI Assistant with Retrieval Augmentation - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_oai_assistant_retrieval.ipynb)
 - **RAG**: Group Chat with Retrieval Augmented Generation (with 5 group member agents and 1 manager agent) - [View Notebook](/docs/notebooks/agentchat_groupchat_RAG)
 
diff --git a/website/docs/tutorial/human-in-the-loop.ipynb b/website/docs/tutorial/human-in-the-loop.ipynb
index afcdeeaf42b..8bf0aab16d0 100644
--- a/website/docs/tutorial/human-in-the-loop.ipynb
+++ b/website/docs/tutorial/human-in-the-loop.ipynb
@@ -41,7 +41,7 @@
     "    termination based on `max_consecutive_auto_reply` is ignored.\n",
     "\n",
     "The previous chapters already showed many examples of the cases when `human_input_mode` is `NEVER`. \n",
-    "Below we show one such example again and then show the differences when this mode is set to `ALWAYS` and `NEVER` instead."
+    "Below we show one such example again and then show the differences when this mode is set to `ALWAYS` and `TERMINATE` instead."
    ]
   },
   {
diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js
index f0c0f84a394..8319c54c61f 100644
--- a/website/docusaurus.config.js
+++ b/website/docusaurus.config.js
@@ -187,7 +187,7 @@ module.exports = {
           ],
         },
       ],
-      copyright: `Copyright © ${new Date().getFullYear()} AutoGen Authors |  <a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy and Cookies</a>`,
+      copyright: `Copyright © ${new Date().getFullYear()} AutoGen Authors |  <a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy and Cookies</a> |  <a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?linkid=2259814">Consumer Health Privacy</a>`,
     },
     announcementBar: {
       id: "whats_new",
diff --git a/website/src/data/gallery.json b/website/src/data/gallery.json
index 10ed9f6866d..02e9e5cdd66 100644
--- a/website/src/data/gallery.json
+++ b/website/src/data/gallery.json
@@ -228,5 +228,12 @@
     "tags": [
       "tools", "ui", "app"
     ]
+  },
+  {
+    "title": "Expense Tracker - using Autogen",
+    "link": "https://github.com/Kirushikesh/Personal-Finance-Agent",
+    "description": "Tracks personal finance income and expenses then helps the user to analyse it using AutoGen agents.",
+    "image": "default.png",
+    "tags": ["tools", "app"]
   }
 ]