Posted By

taboularasa on 03/07/09


Tagged

statistics word sip


Versions (?)

word frequency


 / Published in: Python
 

URL: http://www.daniweb.com/code/snippet374.html

  1. # word frequency in a text
  2. # tested with Python24 vegaseat 25aug2005
  3.  
  4. # Chinese wisdom ...
  5. str1 = """Man who run in front of car, get tired.
  6. Man who run behind car, get exhausted."""
  7. print "Original string:"
  8. print str1
  9.  
  10. print
  11.  
  12. # create a list of words separated at whitespaces
  13. wordList1 = str1.split(None)
  14.  
  15. # strip any punctuation marks and build modified word list
  16. # start with an empty list
  17. wordList2 = []
  18. for word1 in wordList1:
  19. # last character of each word
  20. lastchar = word1[-1:]
  21. # use a list of punctuation marks
  22. if lastchar in [",", ".", "!", "?", ";"]:
  23. word2 = word1.rstrip(lastchar)
  24. else:
  25. word2 = word1
  26. # build a wordList of lower case modified words
  27. wordList2.append(word2.lower())
  28.  
  29. print "Word list created from modified string:"
  30. print wordList2
  31.  
  32. print
  33.  
  34. # create a wordfrequency dictionary
  35. # start with an empty dictionary
  36. freqD2 = {}
  37. for word2 in wordList2:
  38. freqD2[word2] = freqD2.get(word2, 0) + 1
  39.  
  40. # create a list of keys and sort the list
  41. # all words are lower case already
  42. keyList = freqD2.keys()
  43. keyList.sort()
  44.  
  45. print "Frequency of each word in the word list (sorted):"
  46. for key2 in keyList:
  47. print "%-10s %d" % (key2, freqD2[key2])

Report this snippet  

You need to login to post a comment.