Python & GeoComputation
Longzhu Shen Spatial Ecology Jun 2019
Code avalability at
wget https://github.com/selvaje/spatial-ecology-codes/blob/master/docs/source/PYTHON/02_Geo_Python.ipynb
Configuration
Adjust the ‘ast_note_interactivity’ setting to print all versus last expression in Jupyter cell.
[90]:
#from IPython.core.interactiveshell import InteractiveShell
#InteractiveShell.ast_node_interactivity = "last_expr" #"all"
Numpy Intro
Numpy is a foundational package for data analysis in Python. It enables extremely efficient mathematical operations on large data sets. It also can be used to store and manipulate tabular data.
Creating N-dimensional arrays using NumPy
There are many ways to create N-dimensional arrays.
Create 2X3 array initialized to all zeroes & check the dimensions of the array
[1]:
import numpy as np
[2]:
a = np.zeros((2,3))
a
[2]:
array([[0., 0., 0.],
[0., 0., 0.]])
[3]:
a.shape
[3]:
(2, 3)
Create array using arange function
[4]:
a = np.arange(6)
a
[4]:
array([0, 1, 2, 3, 4, 5])
Create array initialized by list of lists
[5]:
a = np.array([[0,1,2],[3,4,5]])
a
[5]:
array([[0, 1, 2],
[3, 4, 5]])
Get values from N-dimensional array
[6]:
a[0,0]
[6]:
0
slicing
[7]:
a[0,:]
[7]:
array([0, 1, 2])
[8]:
a[:,-1]
[8]:
array([2, 5])
[9]:
a[0:2,1:3]
[9]:
array([[1, 2],
[4, 5]])
Modifying N-dimensional arrays
[10]:
a
[10]:
array([[0, 1, 2],
[3, 4, 5]])
[11]:
a[0,0] = 25.0
a
[11]:
array([[25, 1, 2],
[ 3, 4, 5]])
[12]:
a[0,:] = np.array([10,11,12])
a
[12]:
array([[10, 11, 12],
[ 3, 4, 5]])
[13]:
a[:,-1] = np.array([20,21])
a
[13]:
array([[10, 11, 20],
[ 3, 4, 21]])
[13]:
a[0:2,1:3] = np.array([[10,11],[20,21]])
a
[13]:
array([[10, 10, 11],
[ 3, 20, 21]])
Broadcasting
“Describes how numpy treats arrays with different shapes during arithmetic operations”
Subject to constraints, smaller array is ‘broadcast’ across larger array so they have compatible shapes
Looping occurs in C instead of Python - fast!
More info here.
[14]:
a[0,:] = 10.0
a
[14]:
array([[10, 10, 10],
[ 3, 4, 21]])
[15]:
a[:,:] = np.array([30,31,32])
a
[15]:
array([[30, 31, 32],
[30, 31, 32]])
[16]:
tmp = np.array([40,41]).reshape(2,1)
tmp
[16]:
array([[40],
[41]])
[17]:
a[:,:] = tmp
a
[17]:
array([[40, 40, 40],
[41, 41, 41]])
[18]:
a[0:2,1:3] = 100.0
a
[18]:
array([[ 40, 100, 100],
[ 41, 100, 100]])
Math Operations on Arrays
Arithmetic of arrays vs. scalars applies scalar to all elements of array
Arithmetic of array vs. array operates elementwise
[19]:
a = np.arange(4)
a
[19]:
array([0, 1, 2, 3])
[20]:
a + 0.5
[20]:
array([0.5, 1.5, 2.5, 3.5])
[21]:
a + a
[21]:
array([0, 2, 4, 6])
[41]:
a * a
[41]:
array([0, 1, 4, 9])
[22]:
np.dot(a, a)
[22]:
14
[23]:
np.dot(a.reshape(4,1), a.reshape(1,4))
[23]:
array([[0, 0, 0, 0],
[0, 1, 2, 3],
[0, 2, 4, 6],
[0, 3, 6, 9]])
Working with Tabular Data
[24]:
solar_system_file = 'solar_system_abbr.csv'
solar_system_data = np.genfromtxt(solar_system_file, delimiter=',', skip_header=1,
dtype=['S10', 'S20', 'i4', 'f4' , 'f4'])
[34]:
solar_system_data
[34]:
array([(b'Sun', b'Star', 0, 1.3920e+06, 3.33e+05),
(b'Mercury', b'Terrestrial planet', 1, 4.8780e+03, 5.50e-02),
(b'Venus', b'Terrestrial planet', 2, 1.2104e+04, 8.15e-01),
(b'Earth', b'Terrestrial planet', 3, 1.2756e+04, 1.00e+00),
(b'Mars', b'Terrestrial planet', 4, 6.7870e+03, 1.07e-01),
(b'Jupiter', b'Gas giant', 6, 1.4280e+05, 3.18e+02),
(b'Saturn', b'Gas giant', 7, 1.2000e+05, 9.50e+01),
(b'Uranus', b'Gas giant', 8, 5.1118e+04, 1.50e+01),
(b'Neptune', b'Gas giant', 9, 4.9528e+04, 1.70e+01),
(b'Ceres', b'Dwarf planet', 5, 9.7460e+02, 1.60e-04),
(b'Pluto', b'Dwarf planet', 10, 2.3000e+03, 2.00e-03),
(b'Haumea', b'Dwarf planet', 11, 1.3000e+03, 7.00e-04),
(b'Makemake', b'Dwarf planet', 12, 1.4200e+03, 6.70e-04),
(b'Eris', b'Dwarf planet', 13, 2.3260e+03, 2.80e-03)],
dtype=[('f0', 'S10'), ('f1', 'S20'), ('f2', '<i4'), ('f3', '<f4'), ('f4', '<f4')])
[35]:
solar_system_data.dtype.names = ('planet', 'type', 'order', 'diameter', 'mass')
solar_system_data
[35]:
array([(b'Sun', b'Star', 0, 1.3920e+06, 3.33e+05),
(b'Mercury', b'Terrestrial planet', 1, 4.8780e+03, 5.50e-02),
(b'Venus', b'Terrestrial planet', 2, 1.2104e+04, 8.15e-01),
(b'Earth', b'Terrestrial planet', 3, 1.2756e+04, 1.00e+00),
(b'Mars', b'Terrestrial planet', 4, 6.7870e+03, 1.07e-01),
(b'Jupiter', b'Gas giant', 6, 1.4280e+05, 3.18e+02),
(b'Saturn', b'Gas giant', 7, 1.2000e+05, 9.50e+01),
(b'Uranus', b'Gas giant', 8, 5.1118e+04, 1.50e+01),
(b'Neptune', b'Gas giant', 9, 4.9528e+04, 1.70e+01),
(b'Ceres', b'Dwarf planet', 5, 9.7460e+02, 1.60e-04),
(b'Pluto', b'Dwarf planet', 10, 2.3000e+03, 2.00e-03),
(b'Haumea', b'Dwarf planet', 11, 1.3000e+03, 7.00e-04),
(b'Makemake', b'Dwarf planet', 12, 1.4200e+03, 6.70e-04),
(b'Eris', b'Dwarf planet', 13, 2.3260e+03, 2.80e-03)],
dtype=[('planet', 'S10'), ('type', 'S20'), ('order', '<i4'), ('diameter', '<f4'), ('mass', '<f4')])
[36]:
solar_system_data['mass'].mean()
[36]:
23817.64
[37]:
import math
volume_of_earth = 4/3*math.pi*(solar_system_data['diameter'][3]/2.)**3.
volume_of_earth
[37]:
1086781292542.8892
Interfacing with Network
[43]:
# Retrieve a file
import urllib.request
url = "http://spatial-ecology.net/dokuwiki/lib/exe/fetch.php?media=wiki:python:hancock.zip"
fileName = "hancock.zip"
urllib.request.urlretrieve(url, fileName)
[43]:
('hancock.zip', <http.client.HTTPMessage at 0x152e96691390>)
[42]:
# Read data from a url - in this case recent USGS earthquakes
import urllib.request
url = "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_hour.csv"
earthquakes = urllib.request.urlopen(url)
# Read the first two earthquakes
print(earthquakes.readline())
print(earthquakes.readline())
# Iterate through the rest
for record in earthquakes:
print (record)
b'time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\n'
b'2019-05-31T17:17:57.190Z,34.0203323,-117.6094971,19.12,1.96,ml,13,134,0.1116,0.28,ci,ci38613536,2019-05-31T17:20:00.408Z,"4km SSE of Ontario, CA",earthquake,2.14,1.35,0.412,2,automatic,ci,ci\n'
b'2019-05-31T17:05:15.100Z,33.4946667,-116.795,1.59,0.49,ml,19,57,0.01821,0.23,ci,ci38613504,2019-05-31T17:08:56.963Z,"9km NE of Aguanga, CA",earthquake,0.45,0.67,0.127,10,automatic,ci,ci\n'
b'2019-05-31T16:58:50.720Z,33.4896667,-116.7948333,2.37,1.59,ml,56,36,0.02311,0.2,ci,ci38613480,2019-05-31T17:10:18.350Z,"8km NE of Aguanga, CA",earthquake,0.19,0.34,0.151,27,automatic,ci,ci\n'
b'2019-05-31T16:39:45.470Z,45.867,-111.361,4.94,1.57,ml,13,88,0.302,0.13,mb,mb80346349,2019-05-31T17:05:26.100Z,"2km WNW of Manhattan, Montana",earthquake,0.39,8.54,0.255,6,reviewed,mb,mb\n'
b'2019-05-31T16:37:53.690Z,33.5103333,-116.5108333,11.17,0.7,ml,25,82,0.04997,0.18,ci,ci38613456,2019-05-31T16:41:30.754Z,"16km ESE of Anza, CA",earthquake,0.3,0.53,0.161,16,automatic,ci,ci\n'
b'2019-05-31T16:36:34.830Z,34.0466667,-117.5038333,2.53,1.07,ml,31,64,0.06375,0.2,ci,ci38613448,2019-05-31T16:40:14.360Z,"4km NNW of Glen Avon, CA",earthquake,0.32,0.47,0.168,30,automatic,ci,ci\n'
b'2019-05-31T16:22:07.250Z,38.8214989,-122.8526688,2.31,1.13,md,20,64,0.006873,0.03,nc,nc73190601,2019-05-31T17:06:03.673Z,"10km WNW of The Geysers, CA",earthquake,0.23,0.56,0.03,4,automatic,nc,nc\n'
Zipping Files
[44]:
# Unzip a shapefile
import zipfile
zip = open("hancock.zip", "rb") # rb = 'read binary'; more info @ https://docs.python.org/2/tutorial/inputoutput.html
zipShape = zipfile.ZipFile(zip)
for fileName in zipShape.namelist():
out = open(fileName, "wb")
out.write(zipShape.read(fileName))
out.close()
[45]:
# Add a shapefile to a tar archive
import tarfile
tar = tarfile.open("hancock.tar.gz", "w:gz")
tar.add("hancock.shp")
tar.add("hancock.shx")
tar.add("hancock.dbf")
tar.close()
tar
[45]:
<tarfile.TarFile at 0x152e9608d3c8>
[46]:
# Extract a shapefile from a gzipped tar archive
import tarfile
tar = tarfile.open("hancock.tar.gz", "r:gz")
tar.extractall()
tar.close()
tar
[46]:
<tarfile.TarFile at 0x152e9608dac8>
Raster Images
Loading Files
[1]:
# Open a raster with gal
import gdal
raster = gdal.Open("SatImage.tif")
print (raster.RasterCount)
print (raster.RasterXSize)
print (raster.RasterYSize)
3
2592
2693
[30]:
# Numpy/gdalnumeric - Read an image, extract a band, save a new image
from osgeo import gdalnumeric
srcArray = gdalnumeric.LoadFile("SatImage.tif")
band1 = srcArray[0]
gdalnumeric.SaveArray(band1, "band1.jpg", format="JPEG")
[30]:
<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x7ff5036390f0> >
Rasterization
[1]:
# Rasterize a shapefile with PIL
import shapefile
from PIL import Image, ImageDraw
r = shapefile.Reader("hancock.shp")
xdist = r.bbox[2] - r.bbox[0]
ydist = r.bbox[3] - r.bbox[1]
iwidth = 400
iheight = 600
xratio = iwidth/xdist
yratio = iheight/ydist
pixels = []
for x,y in r.shapes()[0].points:
px = int(iwidth - ((r.bbox[2] - x) * xratio))
py = int((r.bbox[3] - y) * yratio)
pixels.append((px,py))
img = Image.new("RGB", (iwidth, iheight), "white")
draw = ImageDraw.Draw(img)
draw.polygon(pixels, outline="rgb(203, 196, 190)", fill="rgb(198, 204, 189)")
img.save("hancock.png")
[78]:
# Create a pdf with a map image
import fpdf
# PDF constructor:
# Portrait, millimeter units, A4 page size
pdf=fpdf.FPDF("P", "mm", "A4")
# create a new page
pdf.add_page()
# Set font: arial, bold, size 20
pdf.set_font('Arial','B',20)
# Create a new cell: 160 x 25mm, text contents, no border, centered
pdf.cell(160,25,'Hancock County Boundary', border=0, align="C")
pdf.image("hancock.png",25,50,150,160)
pdf.output('map.pdf','F')
[78]:
''
Raster Processing
[43]:
"""Swap bands in a raster satellite image"""
# Module within the GDAL python package
import gdalnumeric
# name of our source image
src = "FalseColor.tif"
# load the source image into an array
arr = gdalnumeric.LoadFile(src)
# swap bands 1 and 2 for a natural color image.
# We will use numpy "advanced slicing" to reorder the bands.
# Using the source image
gdalnumeric.SaveArray(arr[[1,0,2],:], "swap.tif", format="GTiff", prototype=src)
[43]:
<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x7ff501aacb10> >
[57]:
"""Perform a simple difference image change detection on a
'before' and 'after' image."""
import gdal, gdalnumeric
import numpy as np
# "Before" image
im1 = "before.tif"
# "After" image
im2 = "after.tif"
# Load before and after into arrays
ar1 = gdalnumeric.LoadFile(im1).astype(np.int8)
ar2 = gdalnumeric.LoadFile(im2)[1].astype(np.int8)
# Perform a simple array difference on the images
diff = ar2 - ar1
# Set up our classification scheme to try
# and isolate significant changes
classes = np.histogram(diff, bins=5)[1]
# The color black is repeated to mask insignificant changes
lut = [[0,0,0],[0,0,0],[0,0,0],[0,0,0],[0,255,0],[255,0,0]]
# Starting value for classification
start = 1
# Set up the output image
rgb = np.zeros((3, diff.shape[0], diff.shape[1],), np.int8)
# Process all classes and assign colors
for i in range(len(classes)):
mask = np.logical_and(start <= diff, diff <= classes[i])
for j in range(len(lut[i])):
rgb[j] = np.choose(mask, (rgb[j], lut[i][j]))
start = classes[i]+1
# Save the output image
gdalnumeric.SaveArray(rgb, "change.tif", format="GTiff", prototype=im2)
[57]:
<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x7ff503644a50> >
[ ]:
Shapefiles
[2]:
#Examine a shapefile with ogr
from osgeo import ogr
# open the shapefile
shp = ogr.Open("point.shp")
# Get the layer
layer = shp.GetLayer()
# Loop through the features and print information about them
for feature in layer:
geometry = feature.GetGeometryRef()
print (geometry.GetX(), geometry.GetY(), feature.GetField("FIRST_FLD"))
1.0 1.0 First
3.0 1.0 Second
4.0 3.0 Third
2.0 2.0 Fourth
0.0 0.0 Appended
[4]:
# Examine a shapefile with pyshp
import shapefile
shp = shapefile.Reader("point")
for feature in shp.shapeRecords():
point = feature.shape.points[0]
rec = feature.record[0]
print (point[0], point[1], rec)
1.0 1.0 First
3.0 1.0 Second
4.0 3.0 Third
2.0 2.0 Fourth
0.0 0.0 Appended
[28]:
"""Choropleth thematic map"""
import math
import shapefile
from PIL import Image, ImageDraw
'''convert geospatial coordinates to pixels'''
def world2screen(bbox, w, h, x, y):
minx,miny,maxx,maxy = bbox
xdist = maxx - minx
ydist = maxy - miny
xratio = w/xdist
yratio = h/ydist
px = int(w - ((maxx - x) * xratio))
py = int((maxy - y) * yratio)
return (px,py)
# Open our shapefile
inShp = shapefile.Reader("GIS_CensusTract_poly")
iwidth = 600
iheight = 400
# PIL Image
img = Image.new("RGB", (iwidth,iheight), (255,255,255))
# PIL Draw module for polygon fills
draw = ImageDraw.Draw(img)
# Get the population AND area index
pop_index = None
area_index = None
# Shade the census tracts
for i,f in enumerate(inShp.fields):
if f[0] == "POPULAT11":
# Account for deletion flag
pop_index = i-1
elif f[0] == "AREASQKM":
area_index = i-1
# Draw the polygons
for sr in inShp.shapeRecords():
density = sr.record[pop_index]/sr.record[area_index]
weight = min(math.sqrt(density/80.0), 1.0) * 50
R = int(205 - weight)
G = int(215 - weight)
B = int(245 - weight)
pixels = []
for x,y in sr.shape.points:
(px,py) = world2screen(inShp.bbox, iwidth, iheight, x, y)
pixels.append((px,py))
draw.polygon(pixels, outline=(255,255,255), fill=(R,G,B))
# Save the image to file
img.save("choropleth.png")
[70]:
from osgeo import ogr
shapefile_name = "tl_2012_us_cbsa.shp"
shapefile = ogr.Open(shapefile_name)
numLayers = shapefile.GetLayerCount()
print ("Shapefile contains %d layers\n" % numLayers)
for layerNum in range(numLayers):
layer = shapefile.GetLayer(layerNum)
spatialRef = layer.GetSpatialRef().ExportToProj4()
numFeatures = layer.GetFeatureCount()
print ("Layer %d has spatial reference %s" % (layerNum, spatialRef))
print ("Layer %d has %d features\n" % (layerNum, numFeatures))
for featureNum in range(10):
feature = layer.GetFeature(featureNum)
featureName = feature.GetField("NAME")
print ("Feature %d has name %s" % (featureNum, featureName))
Shapefile contains 1 layers
Layer 0 has spatial reference +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
Layer 0 has 955 features
Feature 0 has name Hilo, HI
Feature 1 has name Bozeman, MT
Feature 2 has name Magnolia, AR
Feature 3 has name Kingston, NY
Feature 4 has name Astoria, OR
Feature 5 has name Kennewick-Pasco-Richland, WA
Feature 6 has name Fayetteville-Springdale-Rogers, AR-MO
Feature 7 has name Seattle-Tacoma-Bellevue, WA
Feature 8 has name Ellensburg, WA
Feature 9 has name Pierre, SD
[49]:
from osgeo import ogr
shapefile_name = "tl_2012_us_cbsa.shp"
shapefile = ogr.Open(shapefile_name)
layer = shapefile.GetLayer(0)
feature = layer.GetFeature(2)
print ("Feature 2 has the following attributes:")
attributes = feature.items()
for key,value in attributes.items():
print (" %s = %s" % (key, value))
geometry = feature.GetGeometryRef()
geometryName = geometry.GetGeometryName()
print ()
print ("Feature's geometry data consists of a %s" % geometryName)
Feature 2 has the following attributes:
CSAFP = None
CBSAFP = 31620
GEOID = 31620
NAME = Magnolia, AR
NAMELSAD = Magnolia, AR Micro Area
LSAD = M2
MEMI = 2
MTFCC = G3110
ALAND = 1984070931
AWATER = 1809509
INTPTLAT = +33.2230377
INTPTLON = -093.2328433
Feature's geometry data consists of a POLYGON
[50]:
s = []
s.append(" " * 0)
[53]:
s = []
s.append(" " * 0)
s.append(geometry.GetGeometryName())
if geometry.GetPointCount() > 0:
s.append(" with %d data points" % geometry.GetPointCount())
if geometry.GetGeometryCount() > 0:
s.append(" containing:")
print ("".join(s))
POLYGON containing:
[54]:
from osgeo import ogr
def analyzeGeometry(geometry, indent=0):
s = []
s.append(" " * indent)
s.append(geometry.GetGeometryName())
if geometry.GetPointCount() > 0:
s.append(" with %d data points" % geometry.GetPointCount())
if geometry.GetGeometryCount() > 0:
s.append(" containing:")
print ("".join(s))
for i in range(geometry.GetGeometryCount()):
analyzeGeometry(geometry.GetGeometryRef(i), indent+1)
shapefile_name = "tl_2012_us_cbsa.shp"
shapefile = osgeo.ogr.Open(shapefile_name)
layer = shapefile.GetLayer(0)
feature = layer.GetFeature(55)
geometry = feature.GetGeometryRef()
analyzeGeometry(geometry)
POLYGON containing:
LINEARRING with 9193 data points
[55]:
# Examine a shapefile with pyshp
import shapefile
shp = shapefile.Reader("point")
for feature in shp.shapeRecords():
point = feature.shape.points[0]
rec = feature.record[0]
print (point[0], point[1], rec)
1.0 1.0 First
3.0 1.0 Second
4.0 3.0 Third
2.0 2.0 Fourth
0.0 0.0 Appended
[56]:
from osgeo import ogr
def findPoints(geometry, results):
for i in range(geometry.GetPointCount()):
x,y,z = geometry.GetPoint(i)
if results['north'] == None or results['north'][1] < y:
results['north'] = (x,y)
if results['south'] == None or results['south'][1] > y:
results['south'] = (x,y)
for i in range(geometry.GetGeometryCount()):
findPoints(geometry.GetGeometryRef(i), results)
shapefile_name = "tl_2012_us_cbsa.shp"
shapefile = ogr.Open(shapefile_name)
layer = shapefile.GetLayer(0)
feature = layer.GetFeature(55)
geometry = feature.GetGeometryRef()
results = {'north' : None,
'south' : None}
findPoints(geometry, results)
print ("Northernmost point is (%0.4f, %0.4f)" % results['north'])
print ("Southernmost point is (%0.4f, %0.4f)" % results['south'])
Northernmost point is (-93.9197, 33.0195)
Southernmost point is (-93.8752, 31.8442)
Sources : this material was adopted from the following sources
[ ]: