From 792e834909e7835cbac8f135fe44c8380efdc89f Mon Sep 17 00:00:00 2001 From: Jing Peng <114365503+jingpeng7527@users.noreply.github.com> Date: Mon, 29 Jan 2024 20:21:21 -0800 Subject: [PATCH] jp - complete all the sql tasks --- sql/task1.sql | 43 +++++++++++++++++++++++++-- sql/task2.sql | 61 ++++++++++++++++++++++++++++++++++++++- sql/task3.sql | 57 ++++++++++++++++++++++++++++++++++++ tests/test_sql_queries.py | 18 ++++++------ 4 files changed, 166 insertions(+), 13 deletions(-) diff --git a/sql/task1.sql b/sql/task1.sql index 90de336ca..4871931c6 100644 --- a/sql/task1.sql +++ b/sql/task1.sql @@ -1,14 +1,51 @@ -- Problem 1: Retrieve all products in the Sports category -- Write an SQL query to retrieve all products in a specific category. +select * +from `Products` p, `Categories` c +where + p.category_id = c.category_id + and c.category_name = 'Sports & Outdoors'; + -- Problem 2: Retrieve the total number of orders for each user -- Write an SQL query to retrieve the total number of orders for each user. -- The result should include the user ID, username, and the total number of orders. +/* select user_id, username, count(order_id) as total_orders from order_data */ +select + order_data.user_id, + username, + count(*) as total_orders + from `Orders` order_data + LEFT JOIN `Users` user_data ON order_data.user_id = user_data.user_id + group by order_data.user_id, username; + -- Problem 3: Retrieve the average rating for each product -- Write an SQL query to retrieve the average rating for each product. -- The result should include the product ID, product name, and the average rating. --- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders --- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. --- The result should include the user ID, username, and the total amount spent. +select + review_data.product_id, + product_name, + avg(rating) as average_rating +from `Reviews` review_data +LEFT JOIN `Products` product_data ON review_data.product_id = product_data.product_id +group by product_id, product_name; + + -- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders + -- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. + -- The result should include the user ID, username, and the total amount spent. + +SELECT + u.user_id, + u.username, + SUM(o.total_amount) AS total_amount_spent +FROM + `Orders` o +left JOIN + `Users` u ON u.user_id = o.user_id +GROUP BY + u.user_id, u.username +ORDER BY + total_amount_spent DESC +LIMIT 5; diff --git a/sql/task2.sql b/sql/task2.sql index ad2596731..0f9430071 100644 --- a/sql/task2.sql +++ b/sql/task2.sql @@ -3,17 +3,76 @@ -- The result should include the product ID, product name, and the average rating. -- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. +SELECT + product_data.product_id, + product_name, + AVG(rating) AS average_rating +FROM + `Products` product_data + LEFT JOIN `Reviews` review_data ON product_data.product_id = review_data.product_id +GROUP BY + product_id, + product_name +ORDER BY average_rating DESC +LIMIT 1; + -- Problem 6: Retrieve the users who have made at least one order in each category -- Write an SQL query to retrieve the users who have made at least one order in each category. -- The result should include the user ID and username. -- Hint: You may need to use subqueries or joins to solve this problem. +SELECT u.user_id, username +FROM `Users` u +WHERE + u.user_id IN ( + SELECT O.user_id + FROM + `Orders` O + JOIN `Order_Items` item ON O.order_id = item.order_id + JOIN `Products` P ON item.product_id = P.product_id + GROUP BY + O.user_id, P.category_id + HAVING + COUNT(DISTINCT P.category_id) = ( + SELECT COUNT(*) + FROM `Categories` + ) + ); + -- Problem 7: Retrieve the products that have not received any reviews -- Write an SQL query to retrieve the products that have not received any reviews. -- The result should include the product ID and product name. -- Hint: You may need to use subqueries or left joins to solve this problem. +SELECT product_id, product_name +FROM `Products` review_data +WHERE + product_id NOT IN( + SELECT review_data.product_id + FROM review_data + ); + -- Problem 8: Retrieve the users who have made consecutive orders on consecutive days -- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. -- The result should include the user ID and username. --- Hint: You may need to use subqueries or window functions to solve this problem. \ No newline at end of file +-- Hint: You may need to use subqueries or window functions to solve this problem. + +SELECT u.user_id, u.username +FROM `Users` u +WHERE + u.user_id IN ( + SELECT o.user_id + FROM ( + SELECT + user_id, order_date, LAG(order_date, 1) OVER ( + PARTITION BY + user_id + ORDER BY order_date + ) AS prev_order_date + FROM `Orders` order_data + ) AS o + WHERE + DATEDIFF( + o.order_date, o.prev_order_date + ) = 1 + ); \ No newline at end of file diff --git a/sql/task3.sql b/sql/task3.sql index f078a9439..f1152c0f0 100644 --- a/sql/task3.sql +++ b/sql/task3.sql @@ -2,18 +2,75 @@ -- Write an SQL query to retrieve the top 3 categories with the highest total sales amount. -- The result should include the category ID, category name, and the total sales amount. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +Select c.category_id, category_name, SUM(total_amount) as total_amount +From + `Categories` c + Inner Join `Products` p ON c.category_id = p.category_id + Inner Join `Order_Items` oi ON oi.product_id = p.product_id + Inner Join `Orders` o ON oi.order_id = o.order_id +GROUP BY + C.category_id +ORDER BY total_amount DESC +LIMIT 3; -- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games -- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +SELECT u.user_id, username +FROM + `Users` u + JOIN `Orders` o ON u.user_id = o.user_id + JOIN `Order_Items` oi ON o.order_id = oi.order_id + JOIN `Products` p ON oi.product_id = p.product_id + JOIN ( + SELECT category_id + FROM `Categories` + WHERE + category_name = 'Toys & Games' + ) Toys ON p.category_id = Toys.category_id +GROUP BY + u.user_id, + u.username, + Toys.category_id +HAVING + COUNT(DISTINCT P.product_id) = ( + SELECT COUNT(*) + FROM `Products` + WHERE + category_id = Toys.category_id + ); + -- Problem 11: Retrieve the products that have the highest price within each category -- Write an SQL query to retrieve the products that have the highest price within each category. -- The result should include the product ID, product name, category ID, and price. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +SELECT p.product_id, p.product_name, p.category_id, p.price +FROM `Products` p + JOIN ( + SELECT category_id, MAX(price) AS max_price + FROM `Products` + GROUP BY + category_id + ) P_MAX ON p.category_id = P_MAX.category_id + AND p.price = P_MAX.max_price; -- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days -- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. + +SELECT u.user_id, u.username +FROM + `Users` u + JOIN `Orders` o1 on u.user_id = o1.user_id + JOIN `Orders` o2 on u.user_id = o2.user_id + AND o2.order_date = DATE_ADD(o1.order_date, INTERVAL 1 DAY) + JOIN `Orders` o3 on u.user_id = o3.user_id + AND o3.order_date = DATE_ADD(o1.order_date, INTERVAL 2 DAY) +GROUP BY + u.user_id, + u.username +HAVING + COUNT(DISTINCT o1.order_id) > 2; \ No newline at end of file diff --git a/tests/test_sql_queries.py b/tests/test_sql_queries.py index 22b25d546..71bd7b716 100644 --- a/tests/test_sql_queries.py +++ b/tests/test_sql_queries.py @@ -1,16 +1,16 @@ import unittest -import psycopg2 # Replace with appropriate database connector based on your database +import mysql.connector # Replace with appropriate database connector based on your database class TestSQLQueries(unittest.TestCase): def setUp(self): # Establish a connection to your test database - self.conn = psycopg2.connect( - dbname='your_dbname', - user='your_username', - password='your_password', - host='your_host', - port='your_port' + self.conn = mysql.connector.connect( + database='shopify', + user='root', + password='1999821pjcq.', + host='localhost', + port='3306' ) self.cur = self.conn.cursor() @@ -21,7 +21,7 @@ def tearDown(self): def test_task1(self): # Task 1: Example SQL query in task1.sql - with open('/sql/task1.sql', 'r') as file: + with open('./sql/task1.sql', 'r') as file: sql_query = file.read() self.cur.execute(sql_query) @@ -36,7 +36,7 @@ def test_task1(self): def test_task2(self): # Task 2: Example SQL query in task2.sql - with open('/sql/task2.sql', 'r') as file: + with open('./sql/task2.sql', 'r') as file: sql_query = file.read() self.cur.execute(sql_query)