## Various functions for comparing episodes, for searching for matches to ## one episode among other episodes, for blending episodes, and for incorporating ## features from one episode into another. Most of what is here comes from ## Homework 4 and is not needed for Homework 6. from episode import * ## Parameters # How close a match should be MATCH_THRESH = .15 # What number of episodes should match MATCH_N = 5 # Proportion of feature values that need to agree in blend VAL_MATCH_PROP = .7 # How far apart numbers can be and still match INT_MATCH_DEVIATION = .2 def matches(v1, v2): '''Do feature values v1 and v2 match?''' if isinstance(v1, Individual) and isinstance(v2, Individual): if v1 == v2: return 1.0 else: return similarity(v1, v2) elif isinstance(v1, Variable) or isinstance(v2, Variable): # This is wrong; it has to check a list of bindings return 1.0 elif isinstance(v1, dict) or isinstance(v2, dict): return similarity(v1, v2) elif isinstance(v1, int) and isinstance(v2, int): return abs(v1 - v2) / float(max(v1, v2)) < INT_MATCH_DEVIATION elif v1 == v2: return 1.0 else: return 0.0 def feature_weight(feature, feature_weights = {}): '''Get the relative weight associated with feature; used in figuring similarity.''' if feature_weights: return feature_weights[feature] else: return 1 def similarity(dict1, dict2, feature_weights = {}): '''Number between 0 and 1, similarity of dictionaries.''' if not dict1 or not dict2: return 0.0 else: keys1, keys2 = set(dict1.keys()), set(dict2.keys()) # Sum the weights of all features n_all_keys = sum(feature_weight(f, feature_weights) for f in keys1 | keys2) shared_keys = keys1 & keys2 total = 0.0 for k in shared_keys: # For each of the shared features, increment for matches # scaling by the weight of the feature total += matches(dict1[k], dict2[k]) * feature_weight(k, feature_weights) # Scale by the total number of features return total / n_all_keys def process(episode, episodes): '''Find the best matching episodes, blend them, and incorporate results into episode.''' return incorporate(episode, blend(get_best_matching(episode, episodes))) def incorporate(episode, retrieved): '''Incorporate feature values in retrieved into episode, giving priority to episode.''' for retrfeat, retrval in retrieved.iteritems(): origval = episode.get(retrfeat, False) if not origval: episode[retrfeat] = retrval elif isinstance(origval, dict) and not isinstance(origval, Individual): episode[retrfeat] = incorporate(origval, retrval) # Otherwise don't modify the original features return episode def blend(episodes, nEps = False): '''Blend the features in episodes, ignoring features where's no dominant feature.''' n = nEps or len(episodes) dct = dict() all_feats = union(set(e.keys()) for e in episodes) for f in all_feats: values = filter(None, [e.get(f, False) for e in episodes]) v = get_blended_value(values, n) if v: dct[f] = v return dct def get_blended_value(values, n): '''Return a value if one predominates; otherwise False.''' unique = remove_dups(values) if any(lambda v: isinstance(v, dict), values): return blend(values, n) else: for v in unique: if values.count(v) >= n * VAL_MATCH_PROP: return v return None def sim_cmp(d1, d2, d3): '''Comparison function for sorting; compares similiarity of d1 to d2 and d3.''' sim2 = similarity(d1, d2) sim3 = similarity(d1, d3) if sim2 < sim3: return 1 elif sim2 > sim3: return -1 else: return 0 def sort_by_sim(dicts, standard): '''Sort the list of dicts by their similarity to standard.''' return dicts.sort(lambda x, y: sim_cmp(standard, x, y)) def get_best_matching(episode, episodes): '''The MATCH_N episodes that best match episode.''' eps = episodes[:] eps.sort(lambda x, y: sim_cmp(episode, x, y)) return eps[:MATCH_N] def mean_similarity(episode, episodes, feature_weights = {}): '''Mean similarity between episode and episodes.''' return sum([similarity(episode, e, feature_weights) for e in episodes]) / len(episodes)