Revision: 14650
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at June 8, 2009 17:46 by cthulhupunk0
Initial Code
"""
****************************************************
*Name : regexkeyclean.py
*Author : Jason Spadaro
*Date : Mon Jun 8 17:39:16 EDT 2009
*Description : Demonstrates using regular expressions to take data from a
* list, and incorporate it into a dictionary.
*
* Copyright 2009
****************************************************
"""
import re
class myRegexer:
"""
****************************************************
*All this does is describe an aggregator of regular expression objects
****************************************************
"""
def __init__(self, exps):
"""
****************************************************
*Takes a dictionary of strings representing regular expressions,
*uses that dictionary to create a dictionary of regular expression
*objects, and places them in in the instance data.
****************************************************
"""
self.exps = exps
tempDct = {}
for k, v in self.exps.iteritems():
tempDct.update({k: re.compile(v)})
self.exps = tempDct
class dataItem:
"""
****************************************************
*An individual data object
****************************************************
"""
def __init__(self, dct):
"""
****************************************************
*Sets up the dictionary instance data
****************************************************
"""
self.dct = {"name":"", "id":"", "data":""}
self.dct.update(dct)
def __str__(self):
"""
****************************************************
*Convient formating if you need to "see" what this is
****************************************************
"""
myStr = ""
for k, v in self.dct:
myStr = myStr + k + "-->" + v + ","
myStr = myStr[0,-1]
return myStr
class dataDictionary:
"""
****************************************************
*Aggregates dataItems, with a method to change the list to a dictionary
*formatteed according to regular expressions.
****************************************************
"""
def __init__(self, dataList, regexExps):
"""
****************************************************
*Sets up the data list, data dictionary, and the regular
*expressions to be used.
****************************************************
"""
self.dataItems = {}
self.dataList = dataList
self.regexExps = regexExps
self.myRegexer = myRegexer(self.regexExps)
def listToItems(self):
"""
****************************************************
*Converts the list of items to a dictionary of items. It keys
*the item based on a matches to the "name" regular expression.
*The value associated with that key is a dictionary where the
*all the data is keyed similarly using regular expressions.
****************************************************
"""
self.dataItems = {}
for i in self.dataList:
tempDct = {}
name = ""
for k, v in i.iteritems():
"""
****************************************************
*Designates the regular expression to associate
*with each key.
****************************************************
"""
if self.myRegexer.exps["name"].match(k):
tempDct.update({"name":v})
name = tempDct["name"]
if self.myRegexer.exps["id"].match(k):
tempDct.update({"id":v})
if self.myRegexer.exps["data"].match(k):
tempDct.update({"data":v})
self.dataItems.update({name:tempDct})
def __str__(self):
"""
****************************************************
*User readable version of the data
****************************************************
"""
myStr = ""
for k, v in self.dataItems.iteritems():
myStr = myStr + "\n\n____" + k + "____\n"
myStr = myStr + "\t" + v.__str__()
return myStr
##########################################################################
if __name__ == "__main__":
#First we need our data...
my_data = [{"dataName":"foo",
"idNum":1,
"data":1},
{"dataname":"bar",
"id_num":2,
"data":"10"}]
#Now, how are we going to designate keys? Like this:
my_regular_expressions = {"name":"(.*)(N|n)(A|a)(M|m)(E|e)(.*)",
"id":"(.*)(I|i)(D|d)(.*)",
"data":"data"}
#Instantiate our data dictionary
sample_data_dct = dataDictionary(my_data, my_regular_expressions)
#Here's the list we've fed in
print sample_data_dct.dataList
print
#Here's our dictionary. Notice that it's empty.
print sample_data_dct.dataItems
print
print "#####################################"
print
#Here's the magic. Converting the list to our dictionary of "Items"
sample_data_dct.listToItems()
#The final output. Notice that (a) the name of each object has been
#successfully extracted, and (b) each object's keys have been normalized
#for the appropriate values.
print sample_data_dct
######################OUTPUT#######################
"""
[{'dataName': 'foo', 'idNum': 1, 'data': 1}, {'dataname': 'bar', 'id_num': 2, 'data': '10'}]
{}
#####################################
____foo____
{'data': 1, 'name': 'foo', 'id': 1}
____bar____
{'data': '10', 'name': 'bar', 'id': 2}
"""
Initial URL
Initial Description
Initial Title
Regular Expression Key Clean
Initial Tags
Initial Language
Python