#!/usr/bin/env python """Parse active NOAA NWS CAP advisories and output an ATOM feed""" __author__ = 'Daniel Washburn' __copyright__ = 'Copyright (c) 2013, 2014, All rights reserved.' __license__ = 'http://opensource.org/licenses/BSD-3-Clause' import cgi import getopt import sys from lxml import etree from lxml.etree import SubElement ATOM_URI = 'http://www.w3.org/2005/Atom' ATOM_NAMESPACE = {'atom': ATOM_URI} CAP_URI = 'urn:oasis:names:tc:emergency:cap:1.1' CAP_NAMESPACE = {'cap': CAP_URI} CAP_SCHEMA_URL = 'http://docs.oasis-open.org/emergency/cap/v1.1/errata/approved/cap.xsd' CAP_XSLT_URL = 'capatomcustom.xsl' CAP_SCHEMA = etree.XMLSchema(etree.parse(CAP_SCHEMA_URL)) CAP_XSLT = xslt_transform = etree.XSLT(etree.parse(CAP_XSLT_URL)) ATOM_PARSER = etree.XMLParser(ns_clean=True, remove_comments=True) CAP_PARSER = etree.XMLParser(ns_clean=True, remove_comments=True, remove_blank_text=True, schema=CAP_SCHEMA) def usage(): print """nwscap.py [-h] -t (region|zone) -a area -h|--help Print this message and exit -t|--type Area type. must be either region or zone -a|--area The area to report on """ try: opts, args = getopt.gnu_getopt(sys.argv[1:], "ht:a:", ["help", "type=", "area="]) except getopt.GetoptError as err: print str(err) usage() sys.exit(2) area_type=False area=False url=False for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() elif o in ("-t", "--type"): if a in ("region", "zone"): area_type = a else: print """type must be one of: region, zone""" usage() sys.exit() elif o in ("-a", "--area"): area = a else: assert False, "unhandled option" if area_type and area: if area_type == "region": url='https://alerts.weather.gov/cap/' + area + '.php?x=0' elif area_type == "zone": url='https://alerts.weather.gov/cap/wwaatmget.php?x=' + area + '&y=0' else: print """Both type and area must be specified""" usage() sys.exit() in_feed = etree.parse(url, ATOM_PARSER) out_feed = etree.XML(''' ''') SubElement(out_feed, 'id').text = "%s" % \ in_feed.xpath('/atom:feed/atom:id', namespaces=ATOM_NAMESPACE)[0].text SubElement(out_feed, 'updated').text = "%s" % \ in_feed.xpath('/atom:feed/atom:updated', namespaces=ATOM_NAMESPACE)[0].text out_feed_author = SubElement(out_feed, 'author') SubElement(out_feed_author, 'name').text = "%s" % \ in_feed.xpath('/atom:feed/atom:author/atom:name', namespaces=ATOM_NAMESPACE)[0].text SubElement(out_feed, 'title').text = "%s" % \ in_feed.xpath('/atom:feed/atom:title', namespaces=ATOM_NAMESPACE)[0].text SubElement(out_feed, 'link', href="%s" % \ in_feed.xpath('/atom:feed/atom:link/@href', namespaces=ATOM_NAMESPACE)[0]) # get URLs of active alerts entry_titles = in_feed.xpath('/atom:feed/atom:entry/atom:title', namespaces=ATOM_NAMESPACE) alert_links=in_feed.xpath('/atom:feed/atom:entry/atom:link/@href', namespaces=ATOM_NAMESPACE) no_alerts_title = 'There are no active watches, warnings or advisories' if no_alerts_title in entry_titles[0].text: alert_links = [] for link in alert_links: cap=etree.parse(link, CAP_PARSER) cap_entry = SubElement(out_feed, 'entry') SubElement(cap_entry, 'id').text = "%s" % \ cap.xpath('/cap:alert/cap:identifier', namespaces=CAP_NAMESPACE)[0].text cap_entry_author = SubElement(cap_entry, 'author') SubElement(cap_entry_author, 'name').text = "%s" % \ cap.xpath('/cap:alert/cap:sender', namespaces=CAP_NAMESPACE)[0].text SubElement(cap_entry, 'title').text = "%s" % \ cap.xpath('/cap:alert/cap:info/cap:headline', namespaces=CAP_NAMESPACE)[0].text SubElement(cap_entry, 'published').text = "%s" % \ cap.xpath('/cap:alert/cap:sent', namespaces=CAP_NAMESPACE)[0].text SubElement(cap_entry, 'link', href=link) SubElement(cap_entry, "content", type="text/html").text = "%s" % \ etree.tostring(xslt_transform(cap), pretty_print=True) out_tree = etree.ElementTree(out_feed) print(etree.tostring(out_tree, pretty_print=True, method='xml'))