-
Notifications
You must be signed in to change notification settings - Fork 3
/
ex_3_R.R
51 lines (40 loc) · 1.63 KB
/
ex_3_R.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
state_table <-
data.frame(key=c("CA", "NY", "WA", "ON", "QU"),
name=c("California", "new York", "Washington", "Ontario", "Quebec"),
country=c("USA", "USA", "USA", "Canada", "Canada"))
month_table <-
data.frame(key=1:12,
desc=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
quarter=c("Q1","Q1","Q1","Q2","Q2","Q2","Q3","Q3","Q3","Q4","Q4","Q4"))
prod_table <-
data.frame(key=c("Printer", "Tablet", "Laptop"),
price=c(225, 570, 1120))
gen_sales <- function(no_of_recs) {
# Generate transaction data randomly
loc <- sample(state_table$key, no_of_recs,
replace=T, prob=c(2,2,1,1,1))
time_month <- sample(month_table$key, no_of_recs, replace=T)
time_year <- sample(c(2012, 2013), no_of_recs, replace=T)
prod <- sample(prod_table$key, no_of_recs, replace=T, prob=c(1, 3, 2))
unit <- sample(c(1,2), no_of_recs, replace=T, prob=c(10, 3))
amount <- unit*prod_table[prod,]$price
sales <- data.frame(month=time_month,
year=time_year,
loc=loc,
prod=prod,
unit=unit,
amount=amount)
# Sort the records by time order
sales <- sales[order(sales$year, sales$month),]
row.names(sales) <- NULL
return(sales)
}
#sales_fact should be a data frame containing 500 records sorted by year and month
sales_fact <- gen_sales(500)
str(sales_fact)
#to get the first 6 rows of the sales_fact table
head(sales_fact)
#to ge the last 6 records
tail(sales_fact)
#summary of your data frame
print(summary(sales_fact))