Posted By

Cano0617 on 11/08/11


Tagged


Versions (?)

Com sci Assignment 2


 / Published in: Python
 

  1. # No imports are allowed without the permission of your instructor.
  2.  
  3. # Definition: An "association list" is a list of lists, such as
  4. # [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
  5. # Each sublist has two elements: the first is called a "key" and is a value of
  6. # any type, and the second is a list of values (of any type) that are
  7. # associated with that key. No key occurs more than once in an association
  8. # list.
  9.  
  10. # An "image association list" is an association list in which each key is a
  11. # string and its associated list contains the names of image files that
  12. # have that string in their img tag.
  13. # Example:
  14. # [["madonna", ["img3541.jpg", "img1234.jpg"]],
  15. # ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]
  16.  
  17. # Definition: A "filter description" is a string made up of one or more terms
  18. # separated by "and". Each term is a sequence of characters surrounded by
  19. # colons, with optionally the word "not" before it. Example:
  20. # :mtv: and not :madonna:
  21.  
  22.  
  23. def record_associations(description, image, ial):
  24. '''Update the image association list ial with the tokens from str
  25. description. image (a str) is the name of the image to be associated with
  26. the keys in description.
  27. '''
  28. # Note: Remember that all tokens in the ial should be lowercase.
  29. # Passed as lower case? or made lower case?
  30. temp= description.lower().split()
  31.  
  32. for item in temp:
  33. ial.append([item,[]])
  34. #appends key with empty list ready to store images
  35. #code below is for removing duplicates
  36. current=None
  37. last=None
  38. ial.sort()
  39. #smaller lists such as new created duplicates are move ahead of the original which contains
  40. for x in range(len(ial)-1):
  41. current=ial[x][0]
  42. last=ial[x-1][0]
  43. if current==last and len(ial)>1 and x!=0:
  44. ial.pop(x-1)
  45. else:
  46. ial.pop(x)
  47.  
  48. for item in temp:
  49. for descript in ial:
  50. if (item in descript)==True:
  51. descript[1].append(image)
  52.  
  53.  
  54. def process_page(webpage, ial, list_threshold):
  55. '''Update the image association list ial with the images found in the
  56. text in webpage (a str). int list_threshold is the maximum length of any
  57. list in ial.
  58. '''
  59. #uses record associations
  60. # find_attribute_value(webpage,'src')
  61. 'ask for help in creating a list structured like the ial list '
  62. src_temp=[]
  63. src= find_attribute_value(webpage, 'src=')#image name
  64. alt_temp=[]
  65. alt=find_attribute_value(webpage, 'alt=')#description
  66. src_loc=0
  67. alt_loc=0
  68.  
  69. while((src!=None and alt!=None)):
  70. src_temp.append(src.lower())
  71. src_loc = webpage.find(src)#location of image
  72. src = find_attribute_value(webpage[src_loc:],'src')#searches past last image
  73. alt_temp.append(alt.lower())
  74. alt_loc=webpage.find(alt)
  75. alt= find_attribute_value(webpage[alt_loc:],'alt')
  76.  
  77. for x in range(len(alt_temp)):
  78. record_associations(alt_temp[x],src_temp[x],ial)
  79.  
  80. ial=clean_up(ial, list_threshold)
  81.  
  82. #pass this on to process filter description
  83.  
  84.  
  85.  
  86.  
  87. def find_attribute_value(html_tag, att):
  88. '''Return the value of attribute att (a str) in the str html_tag.
  89. Return None if att doesn't occur in html_tag.
  90. '''
  91.  
  92. if not(att in html_tag):
  93. return None
  94. else:
  95. start_index = html_tag.find(att)
  96. end_index= html_tag.find('=',start_index+len(att)+1)
  97. return find_guts(html_tag[start_index:end_index])
  98.  
  99. # for char in html_tag:
  100.  
  101.  
  102.  
  103. def process_filter_description(filter, images, ial):
  104. '''Return a new list containing only items from list images that pass
  105. the description filter (a str). ial is the related image association list.
  106. Matching is done in a case insensitive manner.
  107. '''
  108. restrict=filter.split()
  109. temp=[]
  110. for item in restrict:
  111. if item.count(":")>1:
  112. key=item.split(":")
  113. key=item.strip(':')
  114. temp.append(key.lower())
  115. else:
  116. temp.append(item.lower())
  117. filter=temp
  118. master_set=set(images)
  119. working_set= master_set
  120. all_key=[]
  121. all_set=[]
  122. intersect_only=None
  123. # if True then find intersection
  124. # if False then find intersection of not
  125. has_set=False
  126. for item in ial:
  127. all_set.append(set(item[1]))
  128. all_key.append(item[0])
  129.  
  130.  
  131. for int in range(len(temp)):
  132. if temp[int]=='and' or temp[int]=='not':
  133. has_set=False
  134. elif temp[int-1]=='not':
  135. intersect_only=False
  136. has_set=True
  137. elif all_key.index(temp[int]) :
  138. list.i
  139. elif int==0:
  140. intersect_only=True
  141. has_set=True
  142. elif temp[int-1]=='and':
  143. intersect_only=True
  144. has_set=True
  145. else:
  146. intersect_only=None
  147. has_set=False
  148.  
  149. if intersect_only==False and has_set==True:
  150. # find current key in a list of only keys
  151. find_key=all_key.index(temp[int])
  152. # find set that matches the key
  153. find_set=all_set[find_key]
  154. working_set=working_set.difference(find_set)
  155.  
  156. elif intersect_only==True and has_set==True:
  157. #same idea as above
  158. find_key=all_key.index(temp[int])
  159. find_set=all_set[find_key]
  160. working_set=working_set.intersection(find_set)
  161.  
  162.  
  163. return list(working_set)
  164. #
  165. # index=filter.find(temp[int])
  166. # remove=filter.find('not',0,index)!=-1
  167. # if remove==True:
  168. # pass
  169.  
  170.  
  171. def all_images(ial):
  172. '''Return a list of all the images in image association list ial.
  173. Duplicates are excluded.
  174. '''
  175. x = 0
  176. temp = []
  177. for bob in range(len(ial)):
  178. for int in range(len(ial[bob][1])):
  179. temp.append(ial[bob][1][int])
  180.  
  181. #removes duplicates
  182. for item in temp :
  183. if temp.count(item) > 1 :
  184. temp.remove(item)
  185.  
  186. return temp
  187.  
  188.  
  189.  
  190.  
  191.  
  192.  
  193. def find_guts(s):
  194. '''Return the characters in str s contained within the outermost pair of
  195. matching single or double quotes. If there are no quotes or the outermost
  196. quotes don't match, the empty string is returned.
  197. '''
  198. firstsq = s.find("'")
  199. #location of first single quote
  200. lastsq = s.rfind("'")
  201. #location of last single quote
  202. firstdq = s.find('"')
  203. #location of first double quote
  204. lastdq = s.rfind('"')
  205. #location of last double quote
  206. single = False
  207. #tracks whether or not there is a complete single quote ('abc')
  208. double = False
  209. #tracks whether or not there is a complete double quote ("abc")
  210.  
  211. if firstsq == lastsq :
  212. #if they're the same then it's either an incomplete quote or no quotes at all
  213. single = False
  214. elif firstsq != lastsq :
  215. single = True
  216.  
  217. if firstdq == lastdq :
  218. #same idea as the the single quote check
  219. double = False
  220. elif firstdq != lastdq :
  221. double = True
  222.  
  223. if double == single:
  224. #if both single and double quotes are present
  225. if firstsq < firstdq:
  226. #then find which quote occurs first
  227. #in this case single quotes
  228. return s[firstsq + 1:lastsq]
  229. else:
  230. #other case is to return by double quote
  231. return s[firstdq + 1:lastdq]
  232.  
  233.  
  234. elif single == True:
  235. #if only single is true then return single
  236. return s[firstsq + 1:lastsq]
  237. elif double == True:
  238. #if only double is true then return double
  239. return s[firstdq + 1:lastdq]
  240.  
  241.  
  242.  
  243.  
  244.  
  245.  
  246. def first(a, b):
  247. '''Return the smaller of the two ints a and b, excluding -1. Both a and b
  248. are >= -1. If exactly one is -1, return the other. If both are -1, return
  249. -1.
  250. '''
  251.  
  252. if a == -1 & b == -1 :
  253. return -1
  254. if a == -1:
  255. return b
  256. if b == -1 :
  257. return a
  258. if a < b :
  259. return a
  260. if b < a :
  261. return b
  262.  
  263.  
  264.  
  265.  
  266.  
  267. def clean_up(alist, list_threshold):
  268. '''Return a new association list that is a copy of alist except that any
  269. key-value list in alist longer than list_threshold is not included.
  270. '''
  271. temp = []
  272. for x in range(len(alist)):
  273. if len(alist[x][1]) <= list_threshold :
  274. temp.append(alist[x])
  275. return temp

Report this snippet  

You need to login to post a comment.