-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpg_create_import.sql
executable file
·234 lines (196 loc) · 10.1 KB
/
pg_create_import.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
-- PostgreSQL Hands-On
-- NICAR 2019 Newport Beach, Calif.
-- Table creation and data import statements
-- Notes:
-- * You must have PostgreSQL installed and the PostGIS extension enabled on
-- your database. Visit those projects' websites for installation instructions.
-- * You must update the file path in the COPY statements to match the location
-- on your computer where you have placed the data files.
-- * Note the command-line shapefile import step at the bottom of this file.
-- CENSUS DATA TABLES AND Import
-- Census American Community Survey stats: acs_2011_2015_stats
CREATE TABLE acs_2011_2015_stats (
geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY,
county varchar(50) NOT NULL,
st varchar(20) NOT NULL,
pct_travel_60_min numeric(5,3) NOT NULL,
pct_bachelors_higher numeric(5,3) NOT NULL,
pct_masters_higher numeric(5,3) NOT NULL,
median_hh_income integer,
CHECK (pct_masters_higher <= pct_bachelors_higher)
);
COPY acs_2011_2015_stats
FROM 'C:\Users\Your_Name\Desktop\postgresql-intro-nicar19\data\acs_2011_2015_stats.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
-- Decennial U.S. Census counties 2000: us_counties_2000
CREATE TABLE us_counties_2000 (
geo_name varchar(90), -- County/state name,
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
state_fips varchar(2), -- State FIPS code
county_fips varchar(3), -- County code
p0010001 integer, -- Total population
p0010002 integer, -- Population of one race:
p0010003 integer, -- White Alone
p0010004 integer, -- Black or African American alone
p0010005 integer, -- American Indian and Alaska Native alone
p0010006 integer, -- Asian alone
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0010008 integer, -- Some Other Race alone
p0010009 integer, -- Population of two or more races
p0010010 integer, -- Population of two races
p0020002 integer, -- Hispanic or Latino
p0020003 integer -- Not Hispanic or Latino:
);
COPY us_counties_2000
FROM 'C:\Users\Your_Name\Desktop\postgresql-intro-nicar19\data\us_counties_2000.csv'
WITH (FORMAT CSV, HEADER);
-- Decennial U.S. Census counties: us_counties_2010
CREATE TABLE us_counties_2010 (
geo_name varchar(90), -- Name of the geography
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
summary_level varchar(3), -- Summary Level
region smallint, -- Region
division smallint, -- Division
state_fips varchar(2), -- State FIPS code
county_fips varchar(3), -- County code
area_land bigint, -- Area (Land) in square meters
area_water bigint, -- Area (Water) in square meters
population_count_100_percent integer, -- Population count (100%)
housing_unit_count_100_percent integer, -- Housing Unit count (100%)
internal_point_lat numeric(10,7), -- Internal point (latitude)
internal_point_lon numeric(10,7), -- Internal point (longitude)
-- This section is referred to as P1. Race:
p0010001 integer, -- Total population
p0010002 integer, -- Population of one race:
p0010003 integer, -- White Alone
p0010004 integer, -- Black or African American alone
p0010005 integer, -- American Indian and Alaska Native alone
p0010006 integer, -- Asian alone
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0010008 integer, -- Some Other Race alone
p0010009 integer, -- Population of two or more races
p0010010 integer, -- Population of two races:
p0010011 integer, -- White; Black or African American
p0010012 integer, -- White; American Indian and Alaska Native
p0010013 integer, -- White; Asian
p0010014 integer, -- White; Native Hawaiian and Other Pacific Islander
p0010015 integer, -- White; Some Other Race
p0010016 integer, -- Black or African American; American Indian and Alaska Native
p0010017 integer, -- Black or African American; Asian
p0010018 integer, -- Black or African American; Native Hawaiian and Other Pacific Islander
p0010019 integer, -- Black or African American; Some Other Race
p0010020 integer, -- American Indian and Alaska Native; Asian
p0010021 integer, -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
p0010022 integer, -- American Indian and Alaska Native; Some Other Race
p0010023 integer, -- Asian; Native Hawaiian and Other Pacific Islander
p0010024 integer, -- Asian; Some Other Race
p0010025 integer, -- Native Hawaiian and Other Pacific Islander; Some Other Race
p0010026 integer, -- Population of three races
p0010047 integer, -- Population of four races
p0010063 integer, -- Population of five races
p0010070 integer, -- Population of six races
-- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
p0020001 integer, -- Total
p0020002 integer, -- Hispanic or Latino
p0020003 integer, -- Not Hispanic or Latino:
p0020004 integer, -- Population of one race:
p0020005 integer, -- White Alone
p0020006 integer, -- Black or African American alone
p0020007 integer, -- American Indian and Alaska Native alone
p0020008 integer, -- Asian alone
p0020009 integer, -- Native Hawaiian and Other Pacific Islander alone
p0020010 integer, -- Some Other Race alone
p0020011 integer, -- Two or More Races
p0020012 integer, -- Population of two races
p0020028 integer, -- Population of three races
p0020049 integer, -- Population of four races
p0020065 integer, -- Population of five races
p0020072 integer, -- Population of six races
-- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER
p0030001 integer, -- Total
p0030002 integer, -- Population of one race:
p0030003 integer, -- White alone
p0030004 integer, -- Black or African American alone
p0030005 integer, -- American Indian and Alaska Native alone
p0030006 integer, -- Asian alone
p0030007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0030008 integer, -- Some Other Race alone
p0030009 integer, -- Two or More Races
p0030010 integer, -- Population of two races
p0030026 integer, -- Population of three races
p0030047 integer, -- Population of four races
p0030063 integer, -- Population of five races
p0030070 integer, -- Population of six races
-- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
-- FOR THE POPULATION 18 YEARS AND OVER
p0040001 integer, -- Total
p0040002 integer, -- Hispanic or Latino
p0040003 integer, -- Not Hispanic or Latino:
p0040004 integer, -- Population of one race:
p0040005 integer, -- White alone
p0040006 integer, -- Black or African American alone
p0040007 integer, -- American Indian and Alaska Native alone
p0040008 integer, -- Asian alone
p0040009 integer, -- Native Hawaiian and Other Pacific Islander alone
p0040010 integer, -- Some Other Race alone
p0040011 integer, -- Two or More Races
p0040012 integer, -- Population of two races
p0040028 integer, -- Population of three races
p0040049 integer, -- Population of four races
p0040065 integer, -- Population of five races
p0040072 integer, -- Population of six races
-- This section is referred to as H1. OCCUPANCY STATUS
h0010001 integer, -- Total housing units
h0010002 integer, -- Occupied
h0010003 integer -- Vacant
);
-- Import
COPY us_counties_2010
FROM 'C:\Users\Your_Name\Desktop\postgresql-intro-nicar19\data\us_counties_2010.csv'
WITH (FORMAT CSV, HEADER);
-- FULL-TEXT SEARCH
-- State of the Union addresses: president_speeches
CREATE TABLE president_speeches (
sotu_id serial PRIMARY KEY,
president varchar(100) NOT NULL,
title varchar(250) NOT NULL,
speech_date date NOT NULL,
speech_text text NOT NULL,
search_speech_text tsvector
);
COPY president_speeches (president, title, speech_date, speech_text)
FROM 'C:\Users\Your_Name\Desktop\postgresql-intro-nicar19\data\sotu-1946-1977.csv'
WITH (FORMAT CSV, DELIMITER '|', HEADER OFF, QUOTE '@');
UPDATE president_speeches
SET search_speech_text = to_tsvector('english', speech_text);
CREATE INDEX search_idx ON president_speeches USING gin(search_speech_text);
-- GIS DATA
CREATE EXTENSION postgis;
-- U.S. farmers markets: farmers_markets
CREATE TABLE farmers_markets (
fmid bigint PRIMARY KEY,
market_name varchar(100) NOT NULL,
street varchar(180),
city varchar(60),
county varchar(25),
st varchar(20) NOT NULL,
zip varchar(10),
longitude numeric(10,7),
latitude numeric(10,7),
organic varchar(1) NOT NULL
);
COPY farmers_markets
FROM 'C:\Users\Your_Name\Desktop\postgresql-intro-nicar19\data\farmers_markets.csv'
WITH (FORMAT CSV, HEADER);
ALTER TABLE farmers_markets ADD COLUMN geog_point geography(POINT,4326);
UPDATE farmers_markets
SET geog_point = ST_SetSRID(
ST_MakePoint(longitude,latitude),4326
)::geography;
CREATE INDEX market_pts_idx ON farmers_markets USING GIST (geog_point);
-- Final step: To run some of the spatial queries, you must import
-- a shapefile via the command line: us_counties_2010_shp
-- The command below can be run at the command line to do this. You will need
-- to have set up your environment to recognize PostgreSQL command line tools
-- at your system path.
-- shp2pgsql -I -s 4269 -W Latin1 tl_2010_us_county10.shp us_counties_2010_shp | psql -d nicar_2019 -U postgres