## The world consists of Individuals and Episodes, each a dict of feature-value pairs. ## ## Values for features can be primitive values (strings like 'red'), integers, ## Individuals, sets, or embedded feature-value dicts. Features never have boolean ## values. ## ## A feature that is missing from a dict is considered to be False; that is, there is ## no distinction between "don't care" and "no". The function used to get the value ## for a given feature in an Individual, Episode, or other dict (get_value) is ## >>> some_dict.get(feature, False) ## ## The similarity between two Individuals or two Episodes is a number between 0 and 1. ## It is the ratio of the number of matching features to the total number of features ## that are specified in the two Individuals or Episodes. ## ## For now, all Episodes are examples of utterances, which have as their features ## 'speaker', 'hearer', 'sound' (the word form), 'referent'. ## ## (For Homework 6, we will be leaving out 'speaker' and 'hearer' to simplify things.) ## ## This module also includes procedures for variablization of Episodes and for generation ## of mini-categories, which are blends of Episodes. from utils import * # The long-term memory EPISODES = [] INDIVIDUALS = [] # Marks a variable path VAR = '<-' class Entity(dict): '''A dict that is either an individual or an episode.''' def get_value(self, feature): '''Find the value for the feature, False if there is none.''' self.get(feature, False) def similarity(self, entity): '''Similarity between entities, 1 if they're the same.''' if self is entity: return 1.0 else: return similarity(self, entity) class Individual(Entity): '''A dict with values for different features, representing a particular thing.''' def __init__(self, **args): '''Initialize with optional feature-value pairs; always remember.''' dict.__init__(self, args) self.name = 'indiv' + str(len(INDIVIDUALS)) # Add the individual to the memory. INDIVIDUALS.append(self) def __str__(self): '''Print name gives number of individuals when created.''' return self.name class Episode(Entity): '''A dictionary with values for different roles in an episode; for example an utterance.''' def __init__(self, remember = True, **args): '''Initialize with optional feature-value pairs; remember by default.''' dict.__init__(self, args) self.name = 'Ep' + str(len(EPISODES)) # Add the episode to the memory. if remember: EPISODES.append(self) def __str__(self): '''Print name: each feature and print name of value.''' return reduce(lambda x, y: x + y, [Episode.feat2string(f, v, 0) for f, v in self.iteritems()]) ### Two static methods for pretty printing @staticmethod def feat2string(feat, value, indent): if is_object(value): return ''.ljust(indent) + feat + ': ' + value.__str__() + '\n' else: return ''.ljust(indent) + feat + ':\n' + Episode.dict2string(value, indent + 2) @staticmethod def dict2string(value, indent): return reduce(lambda x, y: x + y, [Episode.feat2string(feat, val, indent) for feat, val in value.iteritems()]) class EpisodeSet(list): '''A set (really a list) of Episodes, a primitive sort of Category.''' def __init__(self, *args): list.__init__(self, args) def __str__(self): '''Print name of each constituent Episode.''' return reduce(lambda x, y: x.__str__() + '\n' + y.__str__(), self) class Variable(object): '''Variables within Episodes.''' SYM = '$' VARIABLES = 0 def __init__(self, prefix, binding = None): self.name = Variable.SYM + prefix + str(Variable.VARIABLES) self.binding = binding Variable.VARIABLES += 1 def __str__(self): return self.name ############################## ## VARIABLIZATION OF EPISODES ############################## def variablize(episode): '''Produce a new episode replacing common elements in the original with variables.''' dups = [] discover_vars(episode, [], dups) if dups: new_ep = Episode(remember = False) subs_vars(new_ep, episode, dups) return new_ep else: return episode def find_var(binding, variables): '''Find the variable with a given binding in a list of variables.''' for v in variables: if v.binding == binding: return v def subs_vars(new_ep, old_ep, variables): '''Substitute variables from old_ep into new_ep.''' for feat, value in old_ep.iteritems(): if is_object(value): var = find_var(value, variables) if var: new_ep[feat] = var else: new_ep[feat] = value else: new_value = dict() subs_vars(new_value, value, variables) new_ep[feat] = new_value def discover_vars(ep, all_vars = [], var_dups = []): '''Find variables in ep, storing duplicates in var_dups.''' for feat, value in ep.iteritems(): if is_object(value): var = find_var(value, all_vars) if var: var_dups.append(var) else: all_vars.append(Variable(feat, binding = value)) else: discover_vars(value, all_vars, var_dups) ##################### ## MINICATS ##################### def make_minicat(ls, bindings): '''Produce an Episode that is a blend of the Episodes in the list (or EpisodeSet).''' if len(ls) > 0 and every(lambda x: isinstance(x, dict), ls): minicat = Episode(remember = False) # For each feat/val pair in the first Episode for feat,val in ls[0].iteritems(): if is_object(val): agree = True i = 1 while agree and i < len(ls): e = ls[i] # For each other episode val1 = e.get(feat) if not val1 or val1 != val: agree = False i += 1 if agree: # All of the dicts agreed on the value for feat minicat[feat] = val else: # Value is a dict, blend values of all Episodes for feat val_blend = make_minicat([d.get(feat) for d in ls]) if val_blend: minicat[feat] = val_blend # Replace same values with variables return variablize(minicat) or minicat def variablize_minicat(minicat, feat_values = {}): '''For features with many different values, replace value with variable. Keep others the same.''' for f,v in minicat.iteritems(): if is_object(v): if get_feat_values(f, feat_values) > 2: minicat[f] = Variable(f) else: variablize_minicat(v, feat_values) def get_feat_values(feat, feat_values = {}): '''Number of different values a feature has.''' if feat_values: value = feat_values.get(feat) if value: return len(value) else: return 100 else: return 100 def is_object(value): '''Is the feature value something other than a dict?''' return isinstance(value, Individual) or \ isinstance(value, Variable) or \ isinstance(value, str) or \ isinstance(value, int) ############################################################## ## Generating random episodes for particular "generalizations" ############################################################## def select(val): '''Given a value specification, select a value.''' if not val: return False elif isinstance(val, int) or isinstance(val, str): return val elif isinstance(val[0], int): # Random int in range return random.randrange(val[0], val[1]) else: return random.choice(val) def select_feat_val(dct, feat): '''Select a value for the feature in the dictionary.''' return select(dct.get(feat, False)) def get_var_val(episode, path, feature, feature_values = {}): '''If there's already a value for the feature path, use that; otherwise, select for the feature.''' return get_path_val(episode, path) or select_feat_val(feature_values, feature) def is_var_path(x): '''Is x a variable?''' return isinstance(x, tuple) and x[0] == VAR def get_path_val(episode, path): '''Look for a value in episode for a path of features.''' if not path: return episode else: # Get the value for the next feature in the path. val = episode.get(path[0], False) if not val: # Nothing found; stop here. return False else: # More features left; look in the value found. return get_path_val(val, path[1:]) def gen_from_template(template, episode = True, feature_values = {}): '''Generate an episode or dict from a template, a list of features and values.''' if episode: ep = Episode() else: ep = dict() for f,v in template: if v == '*': # Pick any value from the feature-value list val = select_feat_val(feature_values, f) elif is_var_path(v): # Generate variable value val = get_var_val(ep, v[1:], f, feature_values) elif isinstance(v, list): # Recursively pick values from those provided val = gen_from_template(v, False) else: # Select a value from what's provided val = select(v) # If some True value has been selected, use it for feature f if val: ep[f] = val return ep def gen_from_templates(templates, n, features): '''Generate n episodes for each of the templates.''' for t in templates: for i in range(n): gen_from_template(t, feature_values = features)