-
Notifications
You must be signed in to change notification settings - Fork 0
/
Analysis.sql
156 lines (120 loc) · 4.81 KB
/
Analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
USE Rental;
-- Select distinct provinces and neighbourhood
SELECT DISTINCT(Provinces) FROM Provinces;
SELECT DISTINCT(Neighbourhood) FROM Neighbourhood;
SELECT YEAR, ROUND(AVG(_1_Bedroom),2) AS '1 BHK' , ROUND(AVG(_2_Bedroom),2) AS '2 BHK' FROM [Rental Data]
WHERE _1_Bedroom> 0
GROUP BY Year;
-- Average rents province wise
SELECT p.Provinces, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1 BHK' ,ROUND(AVG(r._2_Bedroom),2) AS '2 BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
WHERE r._1_Bedroom > 0
GROUP BY r.YEAR, p.Provinces;
-- Average rents Centres wise and getting data for particular Centre using CTE
WITH Centres_data AS
(
SELECT p.Provinces, c.centres, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1 BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
RIGHT JOIN Centres c ON r.Centre_id = c.Centre_id
WHERE r._1_Bedroom > 0
GROUP BY p.Provinces,c.Centres,r.Year
)
SELECT * FROM Centres_data
WHERE Centres = 'Toronto'
ORDER BY Year;
-- Average rents province wise, Neighborhood wise and eliminating data where there are no values
SELECT p.Provinces, n.neighbourhood, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1 BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
RIGHT JOIN Neighbourhood n ON r.Neighbourhood_id = n.Neighbourhood_id
WHERE (r._1_Bedroom > 0 AND p.Provinces LIKE 'Ont.') -- Use wildcard to get neighbourhood trend
GROUP BY r.YEAR, p.Provinces,n.Neighbourhood;
--HAVING ROUND(AVG(r._1_Bedroom),2) > 0 ;
-- Same Results using cte
WITH Provincial_data AS (
SELECT p.Provinces, n.neighbourhood, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1 BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
RIGHT JOIN Neighbourhood n ON r.Neighbourhood_id = n.Neighbourhood_id
WHERE (r._1_Bedroom > 0 AND p.Provinces LIKE 'Ont.') -- Use wildcard to get neighbourhood trend
GROUP BY r.YEAR, p.Provinces,n.Neighbourhood
)
SELECT * FROM Provincial_data;
-- Province with highest rent
SELECT Provinces
FROM Provinces
WHERE province_id IN (
SELECT TOP 1 province_id
FROM [Rental Data]
GROUP BY province_id
ORDER BY ROUND(AVG(_1_Bedroom),2) DESC
);
WITH AvgRentByProvince AS (
SELECT province_id, ROUND(AVG(_1_Bedroom),2) AS avg_rent
FROM [Rental Data]
GROUP BY province_id
),
MaxAvgRent AS (
SELECT MAX(avg_rent) AS max_avg_rent
FROM AvgRentByProvince
),
MinAvgRent AS (
SELECT MIN(avg_rent) AS min_avg_rent
FROM AvgRentByProvince
)
SELECT *
FROM AvgRentByProvince arp
WHERE arp.avg_rent IN (SELECT max_avg_rent FROM MaxAvgRent)
OR arp.avg_rent IN (SELECT min_avg_rent FROM MinAvgRent);
-- We can create view to get insights for the Provinces based on the years
CREATE VIEW RentalSummary AS
SELECT r.Rent_ID, r._1_Bedroom, r.province_id, r.neighbourhood_id, r.YEAR, p.Provinces
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id;
SELECT * FROM RentalSummary;
CREATE VIEW Provincial_data AS (
SELECT p.Provinces, c.centres, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1_BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
RIGHT JOIN Centres c ON r.Centre_id = c.Centre_id
WHERE r._1_Bedroom > 0
GROUP BY p.Provinces,c.Centres,r.Year);
SELECT * FROM Provincial_data;
-- Getting data for the provinces for all years top 5
SELECT TOP 5 Provinces,Year , ROUND(AVG([1_BHK]),2) AS rents FROM Provincial_data
GROUP BY Provinces,Year
ORDER BY YEAR DESC, ROUND(AVG([1_BHK]),2) DESC;
-- filter based on the year top 5
SELECT TOP 5 Provinces ,ROUND(AVG([1_BHK]),2) AS rents FROM Provincial_data
WHERE YEAR = '2018'
GROUP BY Provinces
ORDER BY ROUND(AVG([1_BHK]),2) DESC;
CREATE VIEW Centres_data AS
(SELECT p.Provinces, c.centres, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1_BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
RIGHT JOIN Centres c ON r.Centre_id = c.Centre_id
WHERE r._1_Bedroom > 0
GROUP BY p.Provinces,c.Centres,r.Year)
SELECT * FROM Centres_data;
-- WE can use this to select data based on the Provinces and Year
SELECT TOP 3 Centres ,ROUND(AVG([1_BHK]),2) AS rents FROM Centres_data
WHERE YEAR = '2018' AND Provinces = 'B.C.'
GROUP BY Centres
ORDER BY ROUND(AVG([1_BHK]),2) DESC;
-- Include 2 bhk as well
CREATE VIEW Centres_data_2 AS
(SELECT p.Provinces, c.centres, r.YEAR, ROUND(AVG(r._1_Bedroom),2) AS '1_BHK' ,
ROUND(AVG(r._2_Bedroom),2) AS '2_BHK'
FROM [Rental Data] r
INNER JOIN Provinces p ON r.province_id = p.province_id
RIGHT JOIN Centres c ON r.Centre_id = c.Centre_id
WHERE r._1_Bedroom > 0
GROUP BY p.Provinces,c.Centres,r.Year)
SELECT * FROM Centres_data_2;
SELECT TOP 3 Centres , ROUND(AVG([1_BHK]),2) AS rents_1, ROUND(AVG([2_BHK]),2) AS rents_2 FROM Centres_data_2
WHERE YEAR = '2022' AND Provinces = 'ONT.'
GROUP BY Centres
ORDER BY ROUND(AVG([1_BHK]),2) DESC;