Python Script to Monitor Shapefiles

In an ideal world, the primary, most up to date spatial layers are accessed and maintained on an enterprise spatial database. Here on the little blue planet, a lot of folks still like to edit and access data as shapefiles.

I can’t say I blame them. Despite doing spatial DBA work on Postgres/PostGIS and SDE/SQL Server for years, I have a soft spot for shapefiles. For small to mid-size layers they draw faster, they’re easy to consume, easy to exchange, and they’re the only open format ESRI has. Just the inexplicable delay between double-clicking a SDE connection and seeing a list of layers makes me want to kick something. But having shapefiles that are out of sync with your enterprise database server is bad for everybody.

Here’s a little Python script I threw together to keep track of shapefile changes, which we use to let us know when SDE layers could be out of sync. It reads a comma delimited text file which is formatted <sde layer>, <path to shape file with no extension>, finds the most recent date of .shp/.dbf change, and can email a report or write it to a file, highlighting changes within a certain time period from the present date and shapefiles that have vanished (as they sometimes do).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import datetime
import time
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

# get file date
def modification_date(filename):
return os.path.getmtime(filename)

# see if file exists
def file_exists(filename):
return os.path.exists(filename)

# convert system time to date
def convert_time(t):
datestamp = datetime.datetime.fromtimestamp(t)
date_str = datestamp.strftime("%Y-%m-%d %H:%M:%S")
return date_str

def process(list_of_lines):
data_points = []
for line in list_of_lines:
# split file line into array of sde name and shapefile path
parts = line.split(",")
# check to see if file exists
if file_exists(parts[1].rstrip() + ".shp") :
# get the latest time stamp for the shape file
timestamp1 = modification_date(parts[1].strip() + ".shp")
timestamp2 = modification_date(parts[1].strip() + ".dbf")
timestamp = timestamp2
if timestamp1 > timestamp2 :
timestamp = timestamp1
timestamp_rpt = convert_time(timestamp)
else:
timestamp_rpt = "File be gone!"
timestamp = 0
data_point = [timestamp, parts[0], parts[1], timestamp_rpt]
data_points.append(data_point)
return data_points

# create web page
def create_output(data, timep) :
table = ""
for rec in data :
if rec[0] > (time.time() - timep) :
table += "<tr class='highlight'><td>" + rec[3] + "</td><td>" + rec[1] + "</td><td>" + rec[2] + "</td></tr>"
elif rec[0] == 0 :
table += "<tr class='highlight2'><td>" + rec[3] + "</td><td>" + rec[1] + "</td><td>" + rec[2] + "</td></tr>"
else :
table += "<tr><td>" + rec[3] + "</td><td>" + rec[1] + "</td><td>" + rec[2] + "</td></tr>"
output = "<html><head><style>table { border: 2px solid black} td {border: 1px solid gray } .highlight { background-color: yellow } .highlight2 { background-color: red }</style></head><body><table>"
output += "<tr><th>LAST EDITED</th><th>SDE LAYER</th><th>SHAPE FILE</th></tr>"
output += table
output += "</table></body></html>"
return output

# create file
def write_file(f, data) :
file = open(f, "w")
file.write(data)
file.close()
return 0

# Email
def mail(serverURL=None, sender='', to='', subject='', text=''):
msg = MIMEMultipart('alternative')
msg['Subject'] = "Link"
msg['From'] = sender
msg['To'] = to
ptext = "Python Shapefile Checker Report"
html = text
part1 = MIMEText(ptext, 'plain')
part2 = MIMEText(html, 'html')
msg.attach(part1)
msg.attach(part2)
mailServer = smtplib.SMTP(serverURL)
mailServer.sendmail(sender, to, msg.as_string())
mailServer.quit()



#########################################################
# Customize script here
#########################################################
input_file = "shape_check.txt" # path to the file containing input parameters, format: sde name, path to shape file (no file extension)
output_file = "shape_check.htm" # where to put output file when finished (optional)
time_period = 30 * 86400 # number of days x 86400 to highlight (yellow if modified within last x days)
email_address = "you@yourcompany.com" # where to send email report (optional) (NO HOTMAIL)

# open file
f = open(input_file, "r")
data = f.readlines()
f.close()

# prcess data
datareturn = process(data)

# sort return
datareturn_srt = sorted(datareturn, reverse=True)

# create web page
output = create_output(datareturn_srt, time_period)

# move output to file and lanuch web browser (optional)
write_file(output_file, output)
os.system(output_file)

# email result (optional)
mail("email_server_name", email_address, email_address, "Shape File Monitor", output)