GoogleCloudPlatform · zorrofox · Dec 25, 2024 · Dec 25, 2024 · Dec 25, 2024 · Dec 26, 2024
@@ -555,6 +555,7 @@ alloydb
 antiword
 apikey
 apikeys
+apk
 appspot
 appuser
 apredict
@@ -948,6 +949,7 @@ newaxis
 newaxisngram
 nfcorpus
 nfl
+nginx
 ngram
 ngrams
 nlp

@@ -0,0 +1,20 @@
+FROM nginx:alpine
+
+# install Python 3 and pip
+RUN apk add --no-cache python3=3.12.8-r1 py3-pip=24.0-r2 supervisor=4.2.5-r5
+
+# copy the front end 
+COPY frontend/. /usr/share/nginx/html
+
+# copy backend
+COPY backend/. /app
+
+# install supervisord
+RUN pip3 install --no-cache-dir --break-system-packages -r app/requirements.txt
+
+COPY supervisord.conf /etc/supervisor/supervisord.conf
+COPY nginx.conf /etc/nginx/nginx.conf
+
+EXPOSE 8000
+
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
@@ -165,3 +165,55 @@ You can set up this app locally or via Cloud Shell.
     - Text input: You can write a text prompt to send to the model by entering your message in the box and pressing the send arrow. The model will then respond via audio (turn up your volume!).
     - Voice input: Press the pink microphone button and start speaking. The model will respond via audio. If you would like to mute your microphone, press the button with a slash through the microphone.
     - Video input: The model will also capture your camera input and send it to Gemini. You can ask questions about current or previous video footage. For more details on how this works, visit the [documentation page for the Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live).
+
+### Setup in Cloud Run
+
+1. Clone the repository and cd into the correct directory
+
+    ```sh
+    git clone https://github.com/GoogleCloudPlatform/generative-ai.git
+    cd generative-ai/gemini/multimodal-live-api/websocket-demo-app
+    ```
+
+1. Modify the frontend code to point the WebSocket endpoint to the same container:
+
+    - Navigate to the `script.js` file on line 9, `const PROXY_URL = "wss://[THE_URL_YOU_COPIED_WITHOUT_HTTP]";` and replace `PROXY_URL` value with `/ws`. It should look like: `const PROXY_URL = "/ws";`. Note the absence of the second "s" in "wss" as "ws" indicates a non-secure WebSocket connection. And there is no host part as it will use the same container as the frontend and backend.
+    - Right below on line 10, update `PROJECT_ID` with your Google Cloud project ID.
+    - Save the changes you've made to `script.js`
+
+1. Deploy the code to Cloud Run using the following `gcloud` command:
+
+    ```sh
+    gcloud run deploy --project=YOUR-PROJECT-ID \
+    --region=us-central1 \
+    --source=./ \
+    --allow-unauthenticated \
+    --port=8000  \
+    gemini-live-demo
+    ```
+
+1. Last step command will output a link for the deployment if it run successfully. Copy the link to your browser and navigate to the demo app UI.
+
+1. Get your Google Cloud access token: Run the following command in a terminal with gcloud installed to set your project, and to retrieve your access token.
+
+    ```sh
+    gcloud components update
+    gcloud components install beta
+    gcloud config set project YOUR-PROJECT-ID
+    gcloud auth print-access-token
+    ```
+
+1. Copy the access token from the previous step into the UI that you have open in your browser.
+
+1. Enter the model ID in the UI:
+   Replace `YOUR-PROJECT-ID` in the input with your Google Cloud Project ID.
+
+1. Connect and interact with the demo:
+
+    - After entering your Access Token and Model ID, press the connect button to connect your web app. Now you should be able to interact with Gemini 2.0 with the Multimodal Live API.
+
+1. To interact with the app, you can do the following:
+
+    - Text input: You can write a text prompt to send to the model by entering your message in the box and pressing the send arrow. The model will then respond via audio (turn up your volume!).
+    - Voice input: Press the microphone button to stop speaking. The model will respond via audio. If you would like to mute your microphone, press the button with a slash through the microphone.
+    - Video input: The model will also capture your camera input and send it to Gemini. You can ask questions about current or previous video footage. For more details on how this works, visit the [documentation page for the Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live).
@@ -0,0 +1,43 @@
+worker_processes  auto;
+
+events {
+    worker_connections  1024;
+}
+
+http {
+    include       mime.types;
+    default_type  application/octet-stream;
+
+    sendfile        on;
+
+    keepalive_timeout  65;
+
+    server {
+        listen       8000;
+        server_name  localhost;
+        location / {
+            root   /usr/share/nginx/html;
+            index  index.html index.htm;
+            try_files $uri $uri/ /index.html;
+        }
+
+        location /ws {
+            proxy_pass http://127.0.0.1:8080;
+            proxy_http_version 1.1;
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection "upgrade";
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+
+            proxy_read_timeout 3600s;
+            proxy_send_timeout 3600s;
+        }
+
+        error_page   500 502 503 504  /50x.html;
+        location = /50x.html {
+            root   /usr/share/nginx/html;
+        }
+    }
+}
@@ -0,0 +1,9 @@
+[supervisord]
+nodaemon=true
+
+[program:nginx]
+command=nginx -g "daemon off;"
+
+[program:websocket]
+command=python3 /app/main.py # use python3 command
+directory=/app
-Original file line number
+Diff line change
@@ Expand Up / @@ -555,6 +555,7 @@ alloydb @@
     antiword
     apikey
     apikeys
+    apk
     appspot
     appuser
     apredict
@@ Expand Down Expand Up / @@ -948,6 +949,7 @@ newaxis @@
     newaxisngram
     nfcorpus
     nfl
+    nginx
     ngram
     ngrams
     nlp
@@ Expand Down @@