4/24/201728.7k
rjpolackwich
Public

CatalogMasking

This notebook defines a function that covers an AOI with a minimal collection of the most recent Catalog Images. It returns the relevant Catalog ID's in a data structure that includes the polygon mask that defines the data contribution of that Catalog ID to the AOI coverage.


DigitalGlobe (Required Remote Kernel)
Some of the data for this notebook was provided by the DigitalGlobe remote kernel. You will need access to this remote kernel to be able to make full use of this notebook.

Cover an AOI with the most recent Catalog Images, and return the relevant shape masks

The function generate_catalog_masks defined in the following cell takes a bounding box (shapely polygon) and the results of a Vector Services query, and returns the most recent Catalog Image ID's and the masks that define the data contribution to the coverage.

from shapely.geometry import Polygon, MultiPolygon, shape, box
from shapely.ops import cascaded_union
from shapely.wkt import dumps, loads

from shapely import speedups
if speedups.available:
    speedups.enable()

from functools import partial
from operator import itemgetter
import warnings

from collections import OrderedDict

from itertools import *
import datetime as dt

from gbdxtools import CatalogImage, Interface



def unique_everseen(iterable, key=None):
    "List unique elements, preserving order. Remember all elements ever seen."
    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
    # unique_everseen('ABBCcAD', str.lower) --> A B C D
    seen = set()
    seen_add = seen.add
    if key is None:
        for element in ifilterfalse(seen.__contains__, iterable):
            seen_add(element)
            yield element
    else:
        for element in iterable:
            k = key(element)
            if k not in seen:
                seen_add(k)
                yield element

def get_catid(rec):
    return rec['properties']['attributes']['catalogID']

def get_date(rec):
    return rec['properties']['item_date']

def to_dtime(rec):
    _date = get_date(rec)
    return dt.datetime.strptime(_date, "%Y-%m-%dT%H:%M:%SZ")

def time_ordered(results):
    return sorted(results, key=to_dtime, reverse=True)

class IterWrapper(object):
    def __init__(self, it, fns=lambda x: x):
        self.it = it
        self.fns = fns
        
    def next(self):
        item = self.it.next()
        if isinstance(self.fns, (list, tuple)):
            return [fn(item) for fn in self.fns]
        return self.fns(item)
    
    def __iter__(self):
        return self

def iter_metachips(meta, imtype):
    for rec in meta['parts']:
        yield rec[imtype]
        
def stitch_polys(polygons):
    return cascaded_union(polygons)

def serialize_geo(shapeobj):
    return shapeobj.to_wkt()
        
# iwv8 = partial(iter_metachips, "WORLDVIEW_8_BAND")
# ipan = partial(iter_metachips, "PAN")

def _status_update(old, new, rec, db, polya, n, bba, ncats):
    print("Adding CatalogID {}\nAcquisition date: {}".format(get_catid(rec), get_date(rec)))
    try:
        incr = new - old
        perc = float(incr)/old
    except ZeroDivisionError:
        print("    Initializing working polygon")
    else:
        print("    Coverage increase: {}%".format(perc*100.0))
    finally:
        print("    Polygon Area: {}\n    Bbox Area: {}".format(polya, bba))
        print("CatID's consumed: {}/{}\nCatID's processed: {}\n".format(n, ncats, len(db.keys())))
        

def generate_catalog_masks(bbox, results, query=None, imtype='WORLDVIEW_8_BAND', cover_thresh=0.99):
    """
    Takes a shapely Polygon representing a bounding box and the results from a vector services query 
    and returns a dictionary with a "catalog_masks" key that is an ordered dictionary where keys are catalog id's
    hashing a dict w/ catalog geometry's (the catalog metadata is incorrect regarding this, so has to be built from 
    idaho geometry components) and the mask geometry as wkt serialized strings.
    """
    covered = False
    coverage= 0.0
    working_poly = None
    maskdb = {"from_aoi": bbox.to_wkt(),
              "vs_query": query,
              "catalog_masks": OrderedDict(),
             }
    ires = unique_everseen(time_ordered(results), key=get_catid)
    bbox_area = bbox.area
    ncats = len(set([get_catid(rec) for rec in results]))
    status_update = partial(_status_update, bba=bbox_area, ncats=ncats)
    n = 1
    
    while not covered:
        try:
            rec = ires.next()
        except StopIteration:
            warnings.warn("Catalog shapes coverage NOT comprehensive")
            print("Finished with insufficient coverage")
            break
        
        # Instantiate CatalogImage to get component metadata
        cat_id = get_catid(rec)
        cat_img = CatalogImage(cat_id)
        meta = cat_img.metadata # cache this
        imeta = iter_metachips(meta, imtype)
        
        # Get the component polygons:
        pgons = list(IterWrapper(imeta, fns=itemgetter("geometry")))
        # Fast-union of polygons:
        catpoly = stitch_polys(pgons)
        
        # Get aoi intersection
        catinbox = catpoly.intersection(bbox)
        if not catinbox.is_empty: # We shouldn't trust that this is never empty even though it should be
            if working_poly:
                catmask = catinbox.difference(working_poly)
            else:
                catmask = catinbox
                working_poly = catinbox
            
            if not catmask.is_empty:  # Check for redundancy
                # Update db
                maskdb['catalog_masks'][cat_id] = {"geometry": catpoly.to_wkt(), "mask": catmask.to_wkt()}
                # Update working_poly
                working_poly = stitch_polys([catinbox, working_poly])

                # Update coverage
                wp_area = working_poly.area
                old_coverage = coverage
                coverage = wp_area/bbox_area
                status_update(old_coverage, coverage, rec, maskdb['catalog_masks'], wp_area, n)

                if coverage >= cover_thresh:
                    covered = True
            else:
                print("Skipping CatID {}: redundant fill\n".format(cat_id))
        else:
            print("Skipping CatID {}: does not intersect AOI\n".format(cat_id))
            
        n += 1
                
    return (maskdb, working_poly)
            
        
gbdx = Interface()

# bounds = [-108.5,36.62,-99.54,41.88]
bounds = [-105.1098, 39.6142, -104.6177, 39.9141] # Around Denver
bbox = box(*bounds)
wkt = dumps(bbox)

query = "item_type:DigitalGlobeProduct"
query += " AND item_type:1BProduct AND (item_type:WV03_VNIR or item_type:W02)"

results = gbdx.vectors.query(wkt, query=query)
print len(results), 'images found'
mask, wp = generate_catalog_masks(bbox, results, query)
Adding CatalogID 1040010028318F00
Acquisition date: 2017-01-25T18:04:05Z
    Initializing working polygon
    Polygon Area: 0.0322909352494
    Bbox Area: 0.14758079
CatID's consumed: 1/42
CatID's processed: 1

Adding CatalogID 104001002743EA00
Acquisition date: 2017-01-01T18:15:51Z
    Coverage increase: 67.2823109219%
    Polygon Area: 0.0540170227035
    Bbox Area: 0.14758079
CatID's consumed: 2/42
CatID's processed: 2

Adding CatalogID 1040010026681000
Acquisition date: 2016-12-31T18:01:13Z
    Coverage increase: 8.17982897888%
    Polygon Area: 0.0584355227801
    Bbox Area: 0.14758079
CatID's consumed: 3/42
CatID's processed: 3

Skipping CatID 10400100273E5000: redundant fill

Skipping CatID 1040010024CC8D00: redundant fill

Skipping CatID 10400100264D3A00: redundant fill

Adding CatalogID 1040010024098200
Acquisition date: 2016-10-26T18:31:56Z
    Coverage increase: 1.09915677295%
    Polygon Area: 0.0590778207866
    Bbox Area: 0.14758079
CatID's consumed: 7/42
CatID's processed: 4

Skipping CatID 10400100228DB100: redundant fill

Adding CatalogID 1040010022B49300
Acquisition date: 2016-10-18T18:05:28Z
    Coverage increase: 0.362589893061%
    Polygon Area: 0.0592920309938
    Bbox Area: 0.14758079
CatID's consumed: 9/42
CatID's processed: 5

Skipping CatID 10400100208B7800: redundant fill

Skipping CatID 10400100203F1300: redundant fill

Adding CatalogID 104001001F617800
Acquisition date: 2016-08-11T18:02:20Z
    Coverage increase: 43.6854199842%
    Polygon Area: 0.0851940037506
    Bbox Area: 0.14758079
CatID's consumed: 12/42
CatID's processed: 6

Adding CatalogID 104001001F505200
Acquisition date: 2016-08-11T18:01:57Z
    Coverage increase: 57.8336477403%
    Polygon Area: 0.134464803776
    Bbox Area: 0.14758079
CatID's consumed: 13/42
CatID's processed: 7

Skipping CatID 104001001E6EBF00: redundant fill

Skipping CatID 104001001B79CB00: redundant fill

Skipping CatID 104001001AA24A00: redundant fill

Skipping CatID 104001001761CB00: redundant fill

Skipping CatID 104001001405DA00: redundant fill

Adding CatalogID 1040010014423A00
Acquisition date: 2015-11-15T18:14:48Z
    Coverage increase: 9.75421512262%
    Polygon Area: 0.14758079
    Bbox Area: 0.14758079
CatID's consumed: 19/42
CatID's processed: 8

Draw the coverage

!conda install -n juno -c conda-forge descartes -y -q
import matplotlib.pyplot as plt
from descartes import PolygonPatch
import seaborn as sns

#import matplotlib.animation as animation

def buffer_lims(lmin, lmax, r = 1.5):
    ldiff = lmax - lmin
    bufmag = ldiff * r
    return (lmin - bufmag/2.0, lmax + bufmag/2.0)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.hold(True)
cpal = sns.color_palette("Set2", len(mask['catalog_masks']))
cids = mask['catalog_masks'].keys()
x, y = bbox.exterior.xy

# def init():

#     _bbox, = ax.plot(x, y, color='k', linewidth=3, zorder=100)

#     xmin, xmax = ax.get_xlim()
#     nxmin, nxmax = buffer_lims(xmin, xmax)
#     ymin, ymax = ax.get_ylim()
#     nymin, nymax = buffer_lims(ymin, ymax)

#     ax.set_xlim(nxmin, nxmax);
#     ax.set_ylim(nymin, nymax);
#     ax.hold(True)
#     return _bbox, 


# def update(ind):
#     cid = cids[ind]
#     catpoly = loads(mask['catalog_masks'][cid]['geometry'])
#     x, y = catpoly.exterior.xy
#     line, = ax.plot(x, y, color=cpal[ind], linewidth=2, alpha=0.65)
#     maskpoly = loads(mask['catalog_masks'][cid]['mask'])
#     mpatch = PolygonPatch(maskpoly, fc=cpal[ind], ec=cpal[ind], alpha=0.65)
#     patch = ax.add_patch(mpatch)
#     return [line, patch,]

for ind, cid in enumerate(mask['catalog_masks']):
    catpoly = loads(mask['catalog_masks'][cid]['geometry'])
    x, y = catpoly.exterior.xy
    ax.plot(x, y, color=cpal[ind], linewidth=2, alpha=0.65)
    maskpoly = loads(mask['catalog_masks'][cid]['mask'])
    mpatch = PolygonPatch(maskpoly, fc=cpal[ind], ec=cpal[ind], alpha=0.65)
    patch = ax.add_patch(mpatch)

# anim = animation.FuncAnimation(fig, update, frames=range(len(cids)), init_func=init,
#                                interval=1500, blit=True, repeat=False)

# plt.show()

A weird thing I discovered, shown below

cat_img = CatalogImage("104001002809D400")
parts = cat_img.metadata['parts']
pgons = [rec['WORLDVIEW_8_BAND']['geometry'] for rec in parts]
poly_from_idaho_parts = cascaded_union(pgons)

ipe_meta = cat_img.ipe_metadata
poly_from_ipe_meta = loads(ipe_meta['image']['imageBoundsWGS84'])

# Why these are different?
print(cat_geo.area)
print(cu.area)
0.0237375870154
0.191022025198

Random utility functions

# Collection of random functions

from collections import defaultdict

def relative_overlap(obj, shapes):
    if isinstance(obj, int):
        ind = obj
        s0 = shapes[obj]
    else:
        ind = "CUSTOM"
        s0 = obj
    s0 = s0.geoms[0]
    a0 = s0.area
    print("Chosen shape {} has area {}\n".format(ind, s0.area))
    for _ind, s in enumerate(shapes[:]):
        s = s.geoms[0]
        print("Shape {} has area {}".format(_ind, s.area))
        sdiff = s0.intersection(s)
        print("Intersection reprs {} percent of s0, {} percentof s{} \n".format(sdiff.area/s0.area, sdiff.area/s.area, _ind))
    

def group_vectors_by_catid(catid, vectors, type_filter="Multi"):    
    vecs = []
    for ind, vec in enumerate(vectors):
        attrs = vec['properties']['attributes']
        if attrs['catalogID'] == catid and attrs['bands'] == type_filter:
            vecs.append((ind, vec))
    return vecs

def shapes_from_vectors(vectors):
    return [shape(vec['geometry']) for ind, vec in vectors]

def shapes_from_catim(catim, type_filter="WORLDVIEW_8_BAND"):
    assert type_filter in ["PAN", "WORLDVIEW_8_BAND"]
    shapes = list()
    for part in catim.metadata['parts']:
        if type_filter and type_filter in part:
            p = part[type_filter]
            shapes.append({'shape': p['geometry'],
                            'iid': p['properties']['idahoImageId'],
                            'date': p['properties']['acquisitionDate']})
            
    return shapes

def shapes_match(s0, s1, thresh=0.99):
    sdiff = s0.intersection(s1)
    if sdiff.area/s0.area >= thresh and sdiff.area/s1.area >= thresh:
        return True
    return False
            
def map_vectors_to_idahoids(catid, vectors, cim=None, **kwargs):
    iid_map = {}
    if not cim:
        cim = CatalogImage(catid)
    cim_map = shapes_from_catim(cim, **kwargs)
    vecs = group_vectors_by_catid(catid, vectors)
    vshapes = shapes_from_vectors(vecs)
    
    for ind, vec in vecs:
        for _ind, catim in enumerate(cim_map):
            if shapes_match(vshapes[ind], cim_map[_ind]['shape']):
                iid_map[ind] = vec.update(cim_map[_ind])
    
    return iid_map

def get_vec_iid(ind, vecshapes):
    iid_matches = []
    vshape = vecshapes[ind]
    for i, im in enumerate(cim_map):
        if shapes_match(vshape, im['shape']):
            iid_matches.append(im['iid'])
    return iid_matches