Revision: 52975
Updated Code
at November 12, 2011 07:29 by Cano0617
Updated Code
# No imports are allowed without the permission of your instructor.
# Definition: An "association list" is a list of lists, such as
# [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.
# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
# [["madonna", ["img3541.jpg", "img1234.jpg"]],
# ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]
# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
# :mtv: and not :madonna:
def record_associations(description, image, ial):
'''Update the image association list ial with the tokens from str
description. image (a str) is the name of the image to be associated with
the keys in description.
'''
# Note: Remember that all tokens in the ial should be lowercase.
# Passed as lower case? or made lower case?
temp= description.lower().split()
for item in temp:
ial.append([item,[]])
#appends key with empty list ready to store images
#code below is for removing duplicates
current=None
last=None
ial.sort()
#smaller lists such as new created duplicates are move ahead of the original which contains
for x in range(len(ial)-1):
current=ial[x][0]
last=ial[x-1][0]
if current==last and len(ial)>1 and x!=0:
ial.pop(x-1)
else:
ial.pop(x)
for item in temp:
for descript in ial:
if (item in descript)==True:
descript[1].append(image)
def process_page(webpage, ial, list_threshold):
'''Update the image association list ial with the images found in the
text in webpage (a str). int list_threshold is the maximum length of any
list in ial.
'''
#uses record associations
# find_attribute_value(webpage,'src')
'ask for help in creating a list structured like the ial list '
src_temp=[]
src= find_attribute_value(webpage, 'src=')#image name
alt_temp=[]
alt=find_attribute_value(webpage, 'alt=')#description
src_loc=0
alt_loc=0
while((src!=None and alt!=None)):
src_temp.append(src.lower())
src_loc = webpage.find(src)#location of image
src = find_attribute_value(webpage[src_loc:],'src')#searches past last image
alt_temp.append(alt.lower())
alt_loc=webpage.find(alt)
alt= find_attribute_value(webpage[alt_loc:],'alt')
for x in range(len(alt_temp)):
record_associations(alt_temp[x],src_temp[x],ial)
ial=clean_up(ial, list_threshold)
#pass this on to process filter description
def find_attribute_value(html_tag, att):
'''Return the value of attribute att (a str) in the str html_tag.
Return None if att doesn't occur in html_tag.
'''
if not(att in html_tag):
return None
else:
start_index = html_tag.find(att)
end_index= html_tag.find('=',start_index+len(att)+1)
return find_guts(html_tag[start_index:end_index])
# for char in html_tag:
def process_filter_description(filter, images, ial):
'''Return a new list containing only items from list images that pass
the description filter (a str). ial is the related image association list.
Matching is done in a case insensitive manner.
'''
restrict=filter.split()
temp=[]
for item in restrict:
if item.count(":")>1:
key=item.split(":")
key=item.strip(':')
temp.append(key.lower())
else:
temp.append(item.lower())
filter=temp
master_set=set(images)
working_set= master_set
all_key=[]
all_set=[]
intersect_only=None
# if True then find intersection
# if False then find intersection of not
has_set=False
for item in ial:
all_set.append(set(item[1]))
all_key.append(item[0])
for int in range(len(temp)):
if temp[int]=='and' or temp[int]=='not':
has_set=False
elif temp[int-1]=='not':
intersect_only=False
has_set=True
elif all_key.index(temp[int]) :
list.i
elif int==0:
intersect_only=True
has_set=True
elif temp[int-1]=='and':
intersect_only=True
has_set=True
else:
intersect_only=None
has_set=False
if intersect_only==False and has_set==True:
# find current key in a list of only keys
find_key=all_key.index(temp[int])
# find set that matches the key
find_set=all_set[find_key]
working_set=working_set.difference(find_set)
elif intersect_only==True and has_set==True:
#same idea as above
find_key=all_key.index(temp[int])
find_set=all_set[find_key]
working_set=working_set.intersection(find_set)
return list(working_set)
#
# index=filter.find(temp[int])
# remove=filter.find('not',0,index)!=-1
# if remove==True:
# pass
def all_images(ial):
'''Return a list of all the images in image association list ial.
Duplicates are excluded.
'''
x = 0
temp = []
for bob in range(len(ial)):
for int in range(len(ial[bob][1])):
temp.append(ial[bob][1][int])
#removes duplicates
for item in temp :
if temp.count(item) > 1 :
temp.remove(item)
return temp
def find_guts(s):
'''Return the characters in str s contained within the outermost pair of
matching single or double quotes. If there are no quotes or the outermost
quotes don't match, the empty string is returned.
'''
firstsq = s.find("'")
#location of first single quote
lastsq = s.rfind("'")
#location of last single quote
firstdq = s.find('"')
#location of first double quote
lastdq = s.rfind('"')
#location of last double quote
single = False
#tracks whether or not there is a complete single quote ('abc')
double = False
#tracks whether or not there is a complete double quote ("abc")
if firstsq == lastsq :
#if they're the same then it's either an incomplete quote or no quotes at all
single = False
elif firstsq != lastsq :
single = True
if firstdq == lastdq :
#same idea as the the single quote check
double = False
elif firstdq != lastdq :
double = True
if double == single:
#if both single and double quotes are present
if firstsq < firstdq:
#then find which quote occurs first
#in this case single quotes
return s[firstsq + 1:lastsq]
else:
#other case is to return by double quote
return s[firstdq + 1:lastdq]
elif single == True:
#if only single is true then return single
return s[firstsq + 1:lastsq]
elif double == True:
#if only double is true then return double
return s[firstdq + 1:lastdq]
def first(a, b):
'''Return the smaller of the two ints a and b, excluding -1. Both a and b
are >= -1. If exactly one is -1, return the other. If both are -1, return
-1.
'''
if a == -1 & b == -1 :
return -1
if a == -1:
return b
if b == -1 :
return a
if a < b :
return a
if b < a :
return b
def clean_up(alist, list_threshold):
'''Return a new association list that is a copy of alist except that any
key-value list in alist longer than list_threshold is not included.
'''
temp = []
for x in range(len(alist)):
if len(alist[x][1]) <= list_threshold :
temp.append(alist[x])
return temp
Revision: 52974
Updated Code
at November 11, 2011 02:49 by Cano0617
Updated Code
# No imports are allowed without the permission of your instructor.
# Definition: An "association list" is a list of lists, such as
# [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.
# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
# [["madonna", ["img3541.jpg", "img1234.jpg"]],
# ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]
# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
# :mtv: and not :madonna:
import urllib
def record_associations(description, image, ial):
'''Update the image association list ial with the tokens from str
description. image (a str) is the name of the image to be associated with
the keys in description.
'''
# Note: Remember that all tokens in the ial should be lowercase.
# Passed as lower case? or made lower case?
temp= description.lower().split()
for item in temp:
ial.append([item,[]])
#appends key with empty list ready to store images
#code below is for removing duplicates
current=None
last=None
ial.sort()
#smaller lists such as new created duplicates are move ahead of the original which contains
for x in range(len(ial)-1):
current=ial[x][0]
last=ial[x-1][0]
if current==last and len(ial)>1:
ial.pop(x-1)
for item in temp:
for descript in ial:
if (item in descript)==True:
descript[1].append(image)
def process_page(webpage, ial, list_threshold):
'''Update the image association list ial with the images found in the
text in webpage (a str). int list_threshold is the maximum length of any
list in ial.
'''
#uses record associations
# find_attribute_value(webpage,'src')
'ask for help in creating a list structured like the ial list '
src_temp=[]
src= find_attribute_value(webpage, 'src')#image name
alt_temp=[]
alt=find_attribute_value(webpage, 'alt')#description
src_loc=0
alt_loc=0
while((src!=None and alt!=None)):
src_temp.append(src.lower())
src_loc = webpage.find(src)#location of image
src = find_attribute_value(webpage[src_loc:],'src')#searches past last image
alt_temp.append(alt.lower())
alt_loc=webpage.find(alt)
alt= find_attribute_value(webpage[alt_loc:],'alt')
for x in range(len(alt_temp)):
record_associations(alt_temp[x],src_temp[x],ial)
ial=clean_up(ial, list_threshold)
#pass this on to process filter description
def find_attribute_value(html_tag, att):
'''Return the value of attribute att (a str) in the str html_tag.
Return None if att doesn't occur in html_tag.
'''
if not(att in html_tag):
return None
else:
start_index = html_tag.find(att)
end_index= html_tag.find('=',start_index+len(att)+1)
return find_guts(html_tag[start_index:end_index])
# for char in html_tag:
def process_filter_description(filter, images, ial):
'''Return a new list containing only items from list images that pass
the description filter (a str). ial is the related image association list.
Matching is done in a case insensitive manner.
'''
test=filter.split()
for item in test:
if item.count(":")>2:
key=item.split(":")
key=item.strip(':')
print filter
print images
print ial
pass
def all_images(ial):
'''Return a list of all the images in image association list ial.
Duplicates are excluded.
'''
x = 0
temp = []
for bob in range(len(ial)):
for int in range(len(ial[x][1])):
temp.append(ial[bob][1][int])
#removes duplicates
for item in temp :
if temp.count(item) > 1 :
temp.remove(item)
return temp
def find_guts(s):
'''Return the characters in str s contained within the outermost pair of
matching single or double quotes. If there are no quotes or the outermost
quotes don't match, the empty string is returned.
'''
firstsq = s.find("'")
#location of first single quote
lastsq = s.rfind("'")
#location of last single quote
firstdq = s.find('"')
#location of first double quote
lastdq = s.rfind('"')
#location of last double quote
single = False
#tracks whether or not there is a complete single quote ('abc')
double = False
#tracks whether or not there is a complete double quote ("abc")
if firstsq == lastsq :
#if they're the same then it's either an incomplete quote or no quotes at all
single = False
elif firstsq != lastsq :
single = True
if firstdq == lastdq :
#same idea as the the single quote check
double = False
elif firstdq != lastdq :
double = True
if double == single:
#if both single and double quotes are present
if firstsq < firstdq:
#then find which quote occurs first
#in this case single quotes
return s[firstsq + 1:lastsq]
else:
#other case is to return by double quote
return s[firstdq + 1:lastdq]
elif single == True:
#if only single is true then return single
return s[firstsq + 1:lastsq]
elif double == True:
#if only double is true then return double
return s[firstdq + 1:lastdq]
def first(a, b):
'''Return the smaller of the two ints a and b, excluding -1. Both a and b
are >= -1. If exactly one is -1, return the other. If both are -1, return
-1.
'''
if a == -1 & b == -1 :
return -1
if a == -1:
return b
if b == -1 :
return a
if a < b :
return a
if b < a :
return b
def clean_up(alist, list_threshold):
'''Return a new association list that is a copy of alist except that any
key-value list in alist longer than list_threshold is not included.
'''
temp = []
for x in range(len(alist)):
if len(alist[x][1]) <= list_threshold :
temp.append(alist[x])
return temp
Revision: 52973
Updated Code
at November 8, 2011 14:17 by Cano0617
Updated Code
# No imports are allowed without the permission of your instructor.
# Definition: An "association list" is a list of lists, such as
# [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.
# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
# [["madonna", ["img3541.jpg", "img1234.jpg"]],
# ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]
# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
# :mtv: and not :madonna:
def process_filter_description(filter, images, ial):
'''Return a new list containing only items from list images that pass
the description filter (a str). ial is the related image association list.
Matching is done in a case insensitive manner.
'''
pass
def all_images(ial):
'''Return a list of all the images in image association list ial.
Duplicates are excluded.
'''
x=0
temp=[]
for bob in range(len(ial)):
for int in range(len(ial[x][1])):
temp.append(ial[bob][1][int])
#removes duplicates
for item in temp :
if temp.count(item)>1 :
temp.remove(item)
return temp
def record_associations(description, image, ial):
'''Update the image association list ial with the tokens from str
description. image (a str) is the name of the image to be associated with
the keys in description.
'''
# Note: Remember that all tokens in the ial should be lowercase.
pass
def find_guts(s):
'''Return the characters in str s contained within the outermost pair of
matching single or double quotes. If there are no quotes or the outermost
quotes don't match, the empty string is returned.
'''
firstsq=s.find("'")
#location of first single quote
lastsq=s.rfind("'")
#location of last single quote
firstdq=s.find('"')
#location of first double quote
lastdq=s.rfind('"')
#location of last double quote
single=False
#tracks whether or not there is a complete single quote ('abc')
double=False
#tracks whether or not there is a complete double quote ("abc")
multi_single=False
multi_double=False
if s.count("'")>2:
multi_single=True
if s.count('"')>2:
multi_double=True
if firstsq==lastsq :
#if they're the same then it's either an incomplete quote or no quotes at all
single=False
elif firstsq!=lastsq :
single=True
if firstdq==lastdq :
#same idea as the the single quote check
double=False
elif firstdq!=lastdq :
double=True
if double==single:
#if both single and double quotes are present
if firstsq<firstdq:
#then find which quote occurs first
#in this case single quotes
return s[firstsq+1:lastsq]
else:
#other case is to return by double quote
return s[firstdq+1:lastdq]
elif multi_single==True:
while(multi_single):
s[firstsq+1]
elif single==True:
#if only single is true then return single
return s[firstsq+1:lastsq]
elif double==True:
#if only double is true then return double
return s[firstdq+1:lastdq]
def find_attribute_value(html_tag, att):
'''Return the value of attribute att (a str) in the str html_tag.
Return None if att doesn't occur in html_tag.
'''
print html_tag
print att
if not(att in html_tag):
return None
print find_guts(html_tag)
for char in html_tag:
pass
def first(a, b):
'''Return the smaller of the two ints a and b, excluding -1. Both a and b
are >= -1. If exactly one is -1, return the other. If both are -1, return
-1.
'''
if a==-1 & b==-1 :
return -1
if a== -1:
return b
if b== -1 :
return a
if a<b :
return a
if b<a :
return b
def process_page(webpage, ial, list_threshold):
'''Update the image association list ial with the images found in the
text in webpage (a str). int list_threshold is the maximum length of any
list in ial.
'''
pass
def clean_up(alist, list_threshold):
'''Return a new association list that is a copy of alist except that any
key-value list in alist longer than list_threshold is not included.
'''
temp=[]
for x in range(len(alist)):
if len(alist[x][1])<=list_threshold :
temp.append(alist[x])
return temp
Revision: 52972
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 8, 2011 12:41 by Cano0617
Initial Code
# No imports are allowed without the permission of your instructor.
# Definition: An "association list" is a list of lists, such as
# [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.
# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
# [["madonna", ["img3541.jpg", "img1234.jpg"]],
# ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]
# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
# :mtv: and not :madonna:
def process_filter_description(filter, images, ial):
'''Return a new list containing only items from list images that pass
the description filter (a str). ial is the related image association list.
Matching is done in a case insensitive manner.
'''
pass
def all_images(ial):
'''Return a list of all the images in image association list ial.
Duplicates are excluded.
'''
x=0
temp=[]
for bob in range(len(ial)):
for int in range(len(ial[x][1])):
temp.append(ial[bob][1][int])
#removes duplicates
for item in temp :
if temp.count(item)>1 :
temp.remove(item)
return temp
def record_associations(description, image, ial):
'''Update the image association list ial with the tokens from str
description. image (a str) is the name of the image to be associated with
the keys in description.
'''
# Note: Remember that all tokens in the ial should be lowercase.
pass
def find_guts(s):
'''Return the characters in str s contained within the outermost pair of
matching single or double quotes. If there are no quotes or the outermost
quotes don't match, the empty string is returned.
'''
firstsq=s.find("'")
#location of first single quote
lastsq=s.rfind("'")
#location of last single quote
firstdq=s.find('"')
#location of first double quote
lastdq=s.rfind('"')
#location of last double quote
single=False
#tracks whether or not there is a complete single quote ('abc')
double=False
#tracks whether or not there is a complete double quote ("abc")
if firstsq==lastsq :
#if they're the same then it's either an incomplete quote or no quotes at all
single=False
elif firstsq!=lastsq :
single=True
if firstdq==lastdq :
#same idea as the the single quote check
double=False
elif firstdq!=lastdq :
double=True
if double==single:
#if both single and double quotes are present
if firstsq<firstdq:
#then find which quote occurs first
#in this case single quotes
return s[firstsq+1:lastsq]
else:
#other case is to return by double quote
return s[firstdq+1:lastdq]
elif single==True:
#if only single is true then return single
return s[firstsq+1:lastsq]
elif double==True:
#if only double is true then return double
return s[firstdq+1:lastdq]
def find_attribute_value(html_tag, att):
'''Return the value of attribute att (a str) in the str html_tag.
Return None if att doesn't occur in html_tag.
'''
pass
def first(a, b):
'''Return the smaller of the two ints a and b, excluding -1. Both a and b
are >= -1. If exactly one is -1, return the other. If both are -1, return
-1.
'''
if a==-1 & b==-1 :
return -1
if a== -1:
return b
if b== -1 :
return a
if a<b :
return a
if b<a :
return b
def process_page(webpage, ial, list_threshold):
'''Update the image association list ial with the images found in the
text in webpage (a str). int list_threshold is the maximum length of any
list in ial.
'''
pass
def clean_up(alist, list_threshold):
'''Return a new association list that is a copy of alist except that any
key-value list in alist longer than list_threshold is not included.
'''
pass
Initial URL
Initial Description
Initial Title
Com sci Assignment 2
Initial Tags
Initial Language
Python