Friday, April 21, 2017

A More Robust Merge

Merging various feature classes into a single feature class is a common GIS task, and any GIS newbie knows that ArcGIS has a Merge tool to get the job done. The tool works well, with one small hiccup. If two features classes being merged have fields of the same name but incompatible types (integer and string, say) the tool will fail. This was probably a conscious decision by the developers of ArcGIS, but I would rather have a merge that makes the best choice for a field type when a conflict arises. In the below function, a list of feature classes is merged and fields are evaluated to determine the proper field type for the output feature class. For example, when two feature classes have the same field name but one is text while the other is numeric, the script creates a text field and converts values from the numeric field to text. Date fields that conflict with other field types are converted to strings. Conflicts between double fields and integer fields default to double. The function's mergeFcs input is a python list of input feature classes. The outFc input is the full path of the output feature class.

The code:

import arcpy
import os

def Merge (mergeFcs, outFc):

    """
    Merge feature classes with field type discrepancies
    emilharold@gmail.com
    """
    
    print "iterating fields to determine field types"
    stringFlds = []
    lIntFlds = []
    floatFlds = []
    doubleFlds = []
    dateFlds = []
    numFcs = len (mergeFcs)
    digits = len (str (numFcs))
    lenDi = {}
    placeDi = {}
    fObDi = {}
    place = 0
    
    for i, fc in enumerate (mergeFcs, 1):
        if i != 1 and not (i - 1) % 10:
            print
        print str (i).zfill (digits) + "/" + str (numFcs),
        for fld in arcpy.ListFields (fc):
            fType = fld.type
            fName = fld.name
            if fType in ("Geometry",
                         "Guid",
                         "GlobalID",
                         "OID") or fld.required:
                continue
            fObDi [fName] = fld
            if not fName in placeDi.values ():
                place += 1
                placeDi [place] = fName
            if fType == "String":
                stringFlds += [fName]
                fLength = fld.length
                if not fName in lenDi:
                    lenDi [fName] = fLength
                elif fLength > lenDi [fName]:
                    lenDi [fName] = fLength
            elif fType == "Integer":
                if fName in dateFlds:
                    stringFlds += [fName]
                    fLength = 50
                    if not fName in lenDi:
                        lenDi [fName] = fLength
                    elif fLength > lenDi [fName]:
                        lenDi [fName] = fLength
                else:  
                    lIntFlds += [fName]
            elif fType == "Single":
                if fName in dateFlds:
                    stringFlds += [fName]
                    fLength = 50
                    if not fName in lenDi:
                        lenDi [fName] = fLength
                    elif fLength > lenDi [fName]:
                        lenDi [fName] = fLength
                else:
                    floatFlds += [fName]
            elif fType == "Double":
                if fName in dateFlds:
                    stringFlds += [fName]
                    fLength = 50
                    if not fName in lenDi:
                        lenDi [fName] = fLength
                    elif fLength > lenDi [fName]:
                        lenDi [fName] = fLength
                else:
                    doubleFlds += [fName]
            elif fType in ("Date", "Blob", "Raster"):
                if fName in lIntFlds + floatFlds + doubleFlds:
                    stringFlds += [fName]
                    fLength = 50
                    if not fName in lenDi:
                        lenDi [fName] = fLength
                    elif fLength > lenDi [fName]:
                        lenDi [fName] = fLength
                else:
                    dateFlds += [fName]
                    

    print
    stringFlds = set (stringFlds)
    lIntFlds = set (lIntFlds)
    floatFlds = set (floatFlds)
    doubleFlds = set (doubleFlds)

    print "creating feature class"
    outLoc, outName = os.path.split (outFc)
    checkFc = mergeFcs [0]
    sr = arcpy.Describe (checkFc).spatialReference
    shapeType = arcpy.Describe (checkFc).shapeType.upper ()
    arcpy.CreateFeatureclass_management (outLoc, outName,
                                         shapeType,
                                         spatial_reference = sr)
    lenFlds = str (len (placeDi.keys ()))
    digits = len (lenFlds)
    print "adding fields"
    for i, place in enumerate ((placeDi.keys ()), 1):
        if i != 1 and not (i - 1) % 5:
            print
        print str (i).zfill (digits) + " of " + lenFlds, "",
        fName = placeDi [place]
        fld = fObDi [fName]
        if fName in stringFlds:
            arcpy.AddField_management (outFc, fName, "TEXT",
                                       field_length =
                                       lenDi [fName])
        elif fName in floatFlds:
            arcpy.AddField_management (outFc, fName, "FLOAT")
        elif fName in doubleFlds:
            arcpy.AddField_management (outFc, fName, "DOUBLE")
        elif fName in lIntFlds:
            arcpy.AddField_management (outFc, fName, "LONG")
        else:
            arcpy.AddField_management (outFc, fName, fld.type)

    print
    print "iterating feature classes"
    outFlds = ["SHAPE@"] + [f.name for f in
                            arcpy.ListFields (outFc)
                            if f.type not in
                            ("Geometry", "Guid", "OID")]
    inputRows = [None] * len (outFlds)
    with arcpy.da.InsertCursor (outFc, outFlds) as iCurs:
        for i, fc in enumerate (mergeFcs, 1):
            print i, numFcs, "copying rows from", os.path.basename (fc)
            inFlds = ["SHAPE@"] + [f.name for f
                                   in arcpy.ListFields (fc)
                                   if f.name in outFlds]
            with arcpy.da.SearchCursor (fc, inFlds) as sCurs:
                for inRow in sCurs:
                    row = list (inputRows)
                    for n, fld in enumerate (inFlds):
                        if inRow [n]:
                            if fld in stringFlds:
                                try: row [outFlds.index
                                          (fld)] = str (inRow [n])
                                except: row [outFlds.index
                                             (fld)] = inRow [n]
                                
                            else:
                                row [outFlds.index
                                     (fld)] = inRow [n]
                    row [0] = inRow [0]
                    iCurs.insertRow (row)
    return outFc
profile for Emil Brundage at Geographic Information Systems Stack Exchange, Q&A for cartographers, geographers and GIS professionals

1 comment:

  1. I am really impressed with your writing skills and also with the layout on your weblog. Is this a paid theme or did you modify it yourself? Either way keep up the nice quality writing, it is rare to see a nice blog like this one nowadays..solve your afp problems

    ReplyDelete