hass_scripts/waterconditions.py

#!/usr/bin/env python3
#
# Description:
#
# Simple web server that retrieves data from waterdata.usgs.gov
# and outputs it in JSON format for use in Home Assistant.
# Designed to be used as a rest template.
#
# Requirements:
# - Python 3.9+
# - requests
#
# Usage/installation:
#
# Takes a single argument: the 8-digit code identifying the station.
# This is part of the URL you'd use to view the information on the web,
# and is listed on the web page. For example,
# https://waterdata.usgs.gov/monitoring-location/14339000/
# is for measurement station 14339000.
# The web page title is:
# Rogue River at Dodge Bridge, Near Eagle Point, OR - 14339000
# This means that the command line would be:
# /path/to/binary/riverconditions.py 14339000
#
#
# Update the first line of this script to be the same python3 executable as
# your Home Assistant instance uses.
#
# To use the integration, add one or more river or lake sections to the sensor: section
# of your configuration.yaml file.
# ------------------------
#  - name: rogue_river_curr
#    platform: rest
#    resource: 'http://192.168.1.4:8999/river-14339000'
#    scan_interval: 1800
#    json_attributes:
#      - data
#    value_template: 'Rogue River status at Dodge Bridge'
#  - platform: template
#    sensors:
#      river_temp:
#        friendly_name: "River temperature"
#        device_class: temperature
#        value_template: '{{ state_attr("sensor.rogue_river_curr", "data")["watertemp"] | round(0) }}'
#      river_flow:
#        friendly_name: "River flow rate"
#        device_class:  volume_flow_rate
#        value_template: '{{ state_attr("sensor.rogue_river_curr", "data")["flow"] | round(0) }}'
#      river_height:
#        friendly_name: "River height"
#        device_class: distance
#        value_template: '{{ state_attr("sensor.rogue_river_curr", "data")["height"] | round(1) }}'
#  - name: lostcreek_lake_curr
#    platform: rest
#    resource: 'http://192.168.1.4:8999/lake-14335040'
#    scan_interval: 1800
#    json_attributes:
#      - data
#    value_template: 'Lost Creek Lake status'
#  - platform: template
#    sensors:
#      lake_level:
#        friendly_name: "Lake level"
#        device_class: distance
# ------------------------
# Values returned by the script are in native units, which means:
# * level:     feet above sea level (lake)
# * flow:      cubic feet per second (river)
# * watertemp: degrees Celsius (river)
# * height:    feet (river, lake)
# Note that lakes may sometimes have both height and level. Height is a relative measurement,
# while level is an absolute (feet above sea level). It should always be the case
# that level-height for a lake is a constant (the zero point for the gauge).
#
# The URL should refer to the server and port on which you're running this script.
# The path for the URL must be either "lake-" or "river-" followed by the 8 digit number corresponding
# to the water sensor you want to query.
# You can find water sensors at https://waterdata.usgs.gov.
#
# You can use a regular Web browser to connect to this script; the page returned will contain the
# current values for your sensor in JSON format. This may be helpful in debugging your URL.
#
# You can use any value you want for name, but it must match the sensor specified in the value template.
# Similarly, you can name your river sensors anything you want.
#
# Scan interval should be relatively long, since the values aren't updated
# frequently. Minimum interval should be 600 seconds (every 10 minutes).
# However, since scan_interval doesn't always work, this web server will cache the retrieved values for you.
# It'll only query the USGS server if the retrieved value is at least request_interval seconds old.
# The default for this is 599, so the USGS server is only queried every 10 minutes. This keeps the
# load on the USGS server low, and prevents the USGS from banning you.
#
#==========================================================================
# Copyright 2025 Ethan L. Miller (code@ethanmiller.us)
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import sys,re,time
import json
import requests
from http.server import *

request_interval = 599
global listen_port
listen_port = 8999
mappings = {
    'river': {
        'temperature': 'watertemp',
        'streamflow':  'flow',
        'height':      'height',
    },
    'lake': {
        'surface elevation': 'level',
        'height':            'height',
    },
}

cached_requests = dict()

def get_conditions (station, station_type = 'river', n_tries = 4):
    cur_time = time.time()
    req = None
    new_req = False
    if station in cached_requests:
        (req, req_time) = cached_requests[station]
        if cur_time - req_time > request_interval:
            req = None
        else:
            time_delta = cur_time - req_time
            print (f'Reusing request for {station} {time_delta} seconds old')
    if not req:
        for i in range(n_tries):
            try:
                url = f'https://waterservices.usgs.gov/nwis/iv/?format=json&sites={station}&siteStatus=all'
                req = requests.get (url, timeout=3)
                if req.ok:
                    new_req = True
                    break
            except:
                req = None
    result = dict()
    mp = mappings[station_type]
    if req and req.ok:
        j = req.json()
        for v in j['value']['timeSeries']:
            variable_name = v['variable']['variableName'].lower()
            for k in mp.keys():
                if k in variable_name:
                    result[mp[k]] = float(v['values'][0]['value'][0]['value'])
        if new_req:
            cached_requests[station] = (req, cur_time)
    return result

class WaterConditionsHandler(BaseHTTPRequestHandler):
    def do_GET (self):
        response_code = 404
        data = 'Not found'
        try:
            m = re.search ('/(river|lake)-(\d+)', self.path)
            station_type = m.group (1)
            station = m.group (2)
            result = get_conditions (station, station_type)
            if result:
                response_code = 200
                data = json.dumps({'data': result})
            else:
                raise
        except:
            response_code = 404
        self.send_response (response_code)
        self.send_header('content-type', 'text/plain')
        self.end_headers ()
        self.wfile.write (data.encode())

if __name__ == '__main__':
    if len(sys.argv) > 1:
        listen_port = int(sys.argv[1])
    # print (get_conditions ('14339000', 'river'))
    port = HTTPServer (('', listen_port), WaterConditionsHandler)
    port.serve_forever ()