From ea628dfff1ee3527b7636f171790286f84f77303 Mon Sep 17 00:00:00 2001 From: Rayyan Date: Mon, 29 Jan 2024 21:01:38 -0700 Subject: [PATCH] Final --- sql/task1.sql | 57 +++++++++++++++++++++++++++++++ sql/task2.sql | 83 ++++++++++++++++++++++++++++++++++++++++++++- sql/task3.sql | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+), 1 deletion(-) diff --git a/sql/task1.sql b/sql/task1.sql index 90de336ca..d1346dd5a 100644 --- a/sql/task1.sql +++ b/sql/task1.sql @@ -1,14 +1,71 @@ -- Problem 1: Retrieve all products in the Sports category -- Write an SQL query to retrieve all products in a specific category. +-- Join the products table with the categories table to match each product with its respective category. +-- Filter the products to include only those in the 'Sports' category, +select + p.product_id, + p.product_name, + p.description, + p.price +from + products p +join + categories c on p.category_id = c.category_id +where + c.category_name = 'Sports & Outdoors'; + + -- Problem 2: Retrieve the total number of orders for each user -- Write an SQL query to retrieve the total number of orders for each user. -- The result should include the user ID, username, and the total number of orders. +-- This query joins the 'users' and 'orders' tables to associate each order with the corresponding user, +-- and then groups the results by user to count the number of orders per user. +select + users.user_id, users.username, COUNT(orders.order_id) as total_orders +from + users +join + orders on orders.user_id = users.user_id +group by + users.user_id, users.username; + -- Problem 3: Retrieve the average rating for each product -- Write an SQL query to retrieve the average rating for each product. -- The result should include the product ID, product name, and the average rating. +-- The query joins the 'reviews' and 'products' tables to correlate each review with its product, +-- then computes the average rating for each product and groups the results by product ID and name. + +select + reviews.product_id, + products.product_name, + avg(reviews.rating) as average_rating +from + reviews +inner join + products on reviews.product_id = products.product_id +group by + reviews.product_id, products.product_name; + -- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders -- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. -- The result should include the user ID, username, and the total amount spent. + +-- The query joins the 'orders' table with the 'users' table to include usernames and orders the result +-- to list the top spenders by total amount spent in descending order. +-- This query uses SUM to aggregate total spending per user. Although the current dataset has only one order per user, +-- using SUM is a scalable approach, making the query suitable for larger datasets with multiple orders per user. +select + orders.user_id, + users.username, + sum(orders.total_amount) as total_spent +from + orders +inner join + users on orders.user_id = users.user_id +order by + total_spent desc +limit + 5; \ No newline at end of file diff --git a/sql/task2.sql b/sql/task2.sql index ad2596731..9fd28ce60 100644 --- a/sql/task2.sql +++ b/sql/task2.sql @@ -3,17 +3,98 @@ -- The result should include the product ID, product name, and the average rating. -- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. +-- This query calculates the average rating for each product using a CTE, +-- then selects the product(s) that have the highest average rating across all products. + +with AverageRatings as ( + select + pd.product_id, + pd.product_name, + AVG(r.rating) as avg_rating + from + product_data pd + join + review_data r on pd.product_id = r.product_id + group by + pd.product_id, pd.product_name +) + +select + product_id, + product_name, + avg_rating +from + AverageRatings +where + avg_rating = (select max(avg_rating) from AverageRatings); + + + -- Problem 6: Retrieve the users who have made at least one order in each category -- Write an SQL query to retrieve the users who have made at least one order in each category. -- The result should include the user ID and username. -- Hint: You may need to use subqueries or joins to solve this problem. +-- This query joins several tables to trace the categories of products ordered by each user. +-- By counting if the number of distinct categories each user has placed an order in matches the number of categories in +-- the categories table, it filters for users who have a order history covering all product categories. + +select + u.user_id, + u.username +from + users u +join + orders od on u.user_id = od.user_id +join + order_items oid on od.order_id = oid.order_id +join + products pd on oid.product_id = pd.product_id +group by + u.user_id, + u.username +having + count(distinct pd.category_id) = (select count(*) from categories); + + -- Problem 7: Retrieve the products that have not received any reviews -- Write an SQL query to retrieve the products that have not received any reviews. -- The result should include the product ID and product name. -- Hint: You may need to use subqueries or left joins to solve this problem. +-- Utilizes a LEFT JOIN to combine products with their reviews and identifies those lacking reviews. +select + products.product_id, + products.product_name +from + products +left join + reviews on products.product_id = reviews.product_id +where + reviews.product_id is NULL; + + -- Problem 8: Retrieve the users who have made consecutive orders on consecutive days -- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. -- The result should include the user ID and username. --- Hint: You may need to use subqueries or window functions to solve this problem. \ No newline at end of file +-- Hint: You may need to use subqueries or window functions to solve this problem. + +-- This query uses a CTE to calculate the day difference between each order and the next order for the same user, +-- and then selects users with a one-day difference, indicating consecutive day orders. +with OrderedOrders as ( + select + user_id, + order_date, + datediff(lead(order_date) over (partition by user_id order by order_date), order_date) as diff + from + orders +) +select + o.user_id, + u.username +from + OrderedOrders o +join + users u on o.user_id = u.user_id +where + o.diff = 1; diff --git a/sql/task3.sql b/sql/task3.sql index f078a9439..c81fba9f6 100644 --- a/sql/task3.sql +++ b/sql/task3.sql @@ -3,17 +3,111 @@ -- The result should include the category ID, category name, and the total sales amount. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- The query calculates the total sales amount for each category by aggregating sales data +-- from the joined tables: categories, products, order_items, and orders. +-- It multiplies the quantity of each product sold by its unit price in the order_items table, +-- sums up these amounts per category, and then identifies the top 3 categories based on this total sales value. + +select + cat.category_id, + cat.category_name, + sum(oi.quantity * oi.unit_price) as total_sales_amount +from + categories as cat +join + products as prod on cat.category_id = prod.category_id +join + order_items as oi on prod.product_id = oi.product_id +join + orders as ord on oi.order_id = ord.order_id +group by + cat.category_id, cat.category_name +order by + total_sales_amount desc +limit 3; + -- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games -- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- The query operates in several stages using CTEs: First, it identifies all products in the category, +-- then finds all orders that include these products, and finally selects users who have ordered each of these products. + +with CategoryProducts as ( + select product_id + from products + where category_id = 5 +), +UserOrders as ( + select od.user_id, oi.product_id + from order_items oi + join orders od on oi.order_id = od.order_id + join CategoryProducts cp on oi.product_id = cp.product_id +), +EligibleUsers as ( + select user_id + from UserOrders + group by user_id + having count(distinct product_id) = (select count(*) from CategoryProducts) +) +select eu.user_id, u.username +from EligibleUsers eu +join users u on eu.user_id = u.user_id; + -- Problem 11: Retrieve the products that have the highest price within each category -- Write an SQL query to retrieve the products that have the highest price within each category. -- The result should include the product ID, product name, category ID, and price. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +-- The query operates in two main stages: first it calculates the highest price in each category using a CTE +-- and then joins this CTE with the products table to find products that match the highest price in the category. + +with MaxPrices as ( + select + category_id, + max(price) as max_price + from + products + group by + category_id +) +select + p.product_id, + p.product_name, + p.category_id, + p.price +from + products p +inner join + MaxPrices mp on p.category_id = mp.category_id and p.price = mp.max_price +order by + p.category_id; + -- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days -- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. + +-- This query identifies users who have placed orders on three consecutive days. +-- It does so by using a subquery to retrieve the user ID and the next three order dates for each user, +-- then joining that subquery with the "Users" table to get the corresponding usernames. +-- Conditions in the WHERE clause ensure that the orders are placed on three consecutive days. + +select distinct u.user_id, u.username +from ( + select + o.user_id, + o.order_date, + lead(o.order_date, 1) over (partition by o.user_id order by o.order_date) as next_order_date, + lead(o.order_date, 2) over (partition by o.user_id order by o.order_date) as next_next_order_date + from + orders o +) as sub_query +join + users u on sub_query.user_id = u.user_id +where + date_add(sub_query.order_date, interval 1 day) = sub_query.next_order_date +and + date_add(sub_query.order_date, interval 2 day) = sub_query.next_next_order_date; +