-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVine_Review_Analysis.sql
91 lines (70 loc) · 2.22 KB
/
Vine_Review_Analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
-- Create review_id_table
CREATE TABLE review_id_table (
review_id TEXT PRIMARY KEY NOT NULL,
customer_id INTEGER,
product_id TEXT,
product_parent INTEGER,
review_date DATE -- this should be in the formate yyyy-mm-dd
);
-- Create products_table. This table will contain only unique values
CREATE TABLE products_table (
product_id TEXT PRIMARY KEY NOT NULL UNIQUE,
product_title TEXT
);
-- Create customer table for first data set
CREATE TABLE customers_table (
customer_id INT PRIMARY KEY NOT NULL UNIQUE,
customer_count INT
);
-- Create vine table
CREATE TABLE vine_table (
review_id TEXT PRIMARY KEY,
star_rating INTEGER,
helpful_votes INTEGER,
total_votes INTEGER,
vine TEXT,
verified_purchase TEXT
);
-- Select values in tables to show
SELECT * FROM customers_table;
SELECT * FROM products_table;
SELECT * FROM review_id_table;
SELECT * FROM vine_table;
-- Create a new vine table. Contains helpful values only.
-- (total count >= 20 and helpful votes >= 50%)
SELECT * INTO vine_table_new
FROM vine_table
WHERE total_votes >= 20 AND
CAST(helpful_votes AS FLOAT)/CAST(total_votes AS FLOAT) >= 0.5;
-- Read vine_table_new
SELECT * FROM vine_table_new;
-- Count total number of reviews
SELECT COUNT(*) FROM vine_table_new; -- 40565
-- Calculate total number of vine reviews
SELECT COUNT(*) FROM vine_table_new
WHERE vine = 'Y'; --94
-- Calculate total number of non-vine reviews
SELECT COUNT(*) FROM vine_table_new
WHERE vine = 'N'; --40471
-- Calculate total number of five star vine reviews
SELECT COUNT(*) FROM vine_table_new
WHERE vine = 'Y' AND star_rating = 5; --48
-- Calculate total number of five star non-vine reviews
SELECT COUNT(*) FROM vine_table_new
WHERE vine = 'N' AND star_rating = 5; --15663
-- Calculate percentage of five star vine reviews
SELECT 100 * CAST((SELECT COUNT(*)
FROM vine_table_new vtn
WHERE vtn.vine = 'Y' AND
vtn.star_rating = 5) AS FLOAT)
/ CAST(COUNT(*) AS FLOAT)
FROM vine_table_new vtn
WHERE vtn.vine = 'Y';
-- Calculate percentage of five star non-vine reviews
SELECT 100 * CAST((SELECT COUNT(*)
FROM vine_table_new vtn
WHERE vtn.vine = 'N' AND
vtn.star_rating = 5) AS FLOAT)
/ CAST(COUNT(*) AS FLOAT)
FROM vine_table_new vtn
WHERE vtn.vine = 'N';