Revision: 14650
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at June 8, 2009 17:46 by cthulhupunk0
Initial Code
""" **************************************************** *Name : regexkeyclean.py *Author : Jason Spadaro *Date : Mon Jun 8 17:39:16 EDT 2009 *Description : Demonstrates using regular expressions to take data from a * list, and incorporate it into a dictionary. * * Copyright 2009 **************************************************** """ import re class myRegexer: """ **************************************************** *All this does is describe an aggregator of regular expression objects **************************************************** """ def __init__(self, exps): """ **************************************************** *Takes a dictionary of strings representing regular expressions, *uses that dictionary to create a dictionary of regular expression *objects, and places them in in the instance data. **************************************************** """ self.exps = exps tempDct = {} for k, v in self.exps.iteritems(): tempDct.update({k: re.compile(v)}) self.exps = tempDct class dataItem: """ **************************************************** *An individual data object **************************************************** """ def __init__(self, dct): """ **************************************************** *Sets up the dictionary instance data **************************************************** """ self.dct = {"name":"", "id":"", "data":""} self.dct.update(dct) def __str__(self): """ **************************************************** *Convient formating if you need to "see" what this is **************************************************** """ myStr = "" for k, v in self.dct: myStr = myStr + k + "-->" + v + "," myStr = myStr[0,-1] return myStr class dataDictionary: """ **************************************************** *Aggregates dataItems, with a method to change the list to a dictionary *formatteed according to regular expressions. **************************************************** """ def __init__(self, dataList, regexExps): """ **************************************************** *Sets up the data list, data dictionary, and the regular *expressions to be used. **************************************************** """ self.dataItems = {} self.dataList = dataList self.regexExps = regexExps self.myRegexer = myRegexer(self.regexExps) def listToItems(self): """ **************************************************** *Converts the list of items to a dictionary of items. It keys *the item based on a matches to the "name" regular expression. *The value associated with that key is a dictionary where the *all the data is keyed similarly using regular expressions. **************************************************** """ self.dataItems = {} for i in self.dataList: tempDct = {} name = "" for k, v in i.iteritems(): """ **************************************************** *Designates the regular expression to associate *with each key. **************************************************** """ if self.myRegexer.exps["name"].match(k): tempDct.update({"name":v}) name = tempDct["name"] if self.myRegexer.exps["id"].match(k): tempDct.update({"id":v}) if self.myRegexer.exps["data"].match(k): tempDct.update({"data":v}) self.dataItems.update({name:tempDct}) def __str__(self): """ **************************************************** *User readable version of the data **************************************************** """ myStr = "" for k, v in self.dataItems.iteritems(): myStr = myStr + "\n\n____" + k + "____\n" myStr = myStr + "\t" + v.__str__() return myStr ########################################################################## if __name__ == "__main__": #First we need our data... my_data = [{"dataName":"foo", "idNum":1, "data":1}, {"dataname":"bar", "id_num":2, "data":"10"}] #Now, how are we going to designate keys? Like this: my_regular_expressions = {"name":"(.*)(N|n)(A|a)(M|m)(E|e)(.*)", "id":"(.*)(I|i)(D|d)(.*)", "data":"data"} #Instantiate our data dictionary sample_data_dct = dataDictionary(my_data, my_regular_expressions) #Here's the list we've fed in print sample_data_dct.dataList print #Here's our dictionary. Notice that it's empty. print sample_data_dct.dataItems print print "#####################################" print #Here's the magic. Converting the list to our dictionary of "Items" sample_data_dct.listToItems() #The final output. Notice that (a) the name of each object has been #successfully extracted, and (b) each object's keys have been normalized #for the appropriate values. print sample_data_dct ######################OUTPUT####################### """ [{'dataName': 'foo', 'idNum': 1, 'data': 1}, {'dataname': 'bar', 'id_num': 2, 'data': '10'}] {} ##################################### ____foo____ {'data': 1, 'name': 'foo', 'id': 1} ____bar____ {'data': '10', 'name': 'bar', 'id': 2} """
Initial URL
Initial Description
Initial Title
Regular Expression Key Clean
Initial Tags
Initial Language
Python