Posted By

tamuratetsuya on 02/19/09


Tagged

python


Versions (?)


Advertising

Submit Site


Who likes this?

1 person has marked this snippet as a favorite

cu39


人工無脳のソース アルゴリズムコンテスト勉強会にて


Published in: Python 



Website Promotion
DIRECTORY
is a crucial factor for all websites that need to gain better organic search engine rankings and increase website traffic.
Submitting your website as part of your Web Promotion strategy to our SEO friendly and high traffic Business Directory for review is an excellent way to gain a valuable backlink and increase your websites visibility online.

Submit Site


URL: http://www.team-lab.com/news/index.php?itemid=469

アルゴリズムコンテストの勉強会で紹介したGoogle App Engineで動くpythonコードです Djangoフレームワークを前提にしています。 あまりコードとしては参考にならないかも知れませんが、これだけでも簡単な人工無脳ができるんですね

Expand | Embed | Plain Text
  1. #!-*- coding:utf-8 -*-
  2. import sys
  3. import os
  4. import string
  5. import urllib
  6. import urllib2
  7. import math
  8. import htmlentitydefs
  9. import re
  10. from django.shortcuts import render_to_response
  11. from google.appengine.api import users
  12. from django.http import HttpResponseRedirect
  13. from BeautifulSoup import BeautifulSoup
  14. from google.appengine.api import urlfetch
  15. from base64 import b64encode
  16.  
  17.  
  18. #初期ページ
  19. def Mainpage(request):
  20. return render_to_response('input.html',)
  21.  
  22.  
  23. #質問をもらって回答を出すまでのメソッド
  24. def getAnswer(request):
  25. search_text = ''
  26. from_user = 'username'
  27. try:
  28. search_text=request.POST['query']
  29. search_text=search_text.encode('utf-8')
  30. except:pass
  31. if search_text == '':
  32. search_text,from_user = GetresponseTwitter()
  33. #print >>sys.stderr,'search_text=%s'%search_text
  34. search_text = search_text.encode('utf-8')
  35. to_ = ''
  36. from_ = ''
  37.  
  38. #質問文をそのまま検索にかける
  39. if search_text!='' :
  40. url = 'http://pcod.no-ip.org/yats/search?query=%s&rss' % (search_text)
  41. xml_search2=urlfetch.fetch(url).content
  42. entry = BeautifulSoup(xml_search2)('entry')
  43. words=[]
  44. titledict=dict()
  45. answer = ''
  46. for en in entry:
  47. #print >>sys.stderr,en('summary')[0]
  48. words=en('summary')[0](text = True)[0].split(' ')
  49. update_before = en('updated')[0](text = True)[0]
  50. #print >>sys.stderr,status_id
  51. try:
  52. regexp = re.compile('username')
  53. if regexp.search(en('summary')[0](text = True)[0])== None:#tam_botの発言は無視
  54. from_=words[0].split('@')[1]
  55. to_=words[2].split('@')[1]
  56.  
  57. url='http://pcod.no-ip.org/yats/search?query=user:'+to_+'%20%40'+from_+'%20&lang=ja&rss'
  58. #print >>sys.stderr,url
  59. xml_search=urlfetch.fetch(url).content
  60. title1to2 = BeautifulSoup(xml_search)('entry')
  61. for x in title1to2:
  62. #titledict[x('updated')[0](text = True)[0]]=x('summary')[0].contents[0]
  63. update_after=x('updated')[0](text = True)[0]
  64. if update_after > update_before:
  65. #print >>sys.stderr,'update_before=%s update_after=%s'%(update_before,update_after)
  66. tmp_answer=x('summary')[0](text = True)[0].split(' ')[3]
  67. regexp1 = re.compile('@')
  68. if regexp1.search(tmp_answer)==None and tmp_answer!='':
  69. print >>sys.stderr,'answer=%s'%tmp_answer
  70. answer = tmp_answer
  71. break#一個でもみつかればbreak
  72. if tmp_answer != '':
  73. break
  74. except:pass
  75. #print >>sys.stderr,'answer=%s'%answer
  76.  
  77.  
  78. if answer != '':#検索結果があった場合
  79. post = '@%s %s'%(from_user,answer)
  80. else:#検索結果がなかった場合
  81. #############文章だけで回答が出せない場合#############
  82. question_word=GetKeitaiso_mecapi(search_text).encode('utf-8')
  83. q = {'query':question_word,'lang' : 'ja'}
  84. url = 'http://pcod.no-ip.org/yats/search?rss&' + urllib.urlencode(q)
  85. xml=urlfetch.fetch(url).content
  86. #print >>sys.stderr,xml
  87. entry = BeautifulSoup(xml)('entry')
  88. for en in entry:
  89. tmp_answer=en('summary')[0](text = True)[0].split(' : ')[1].rstrip()
  90. regexp = re.compile('tam_bot')
  91. if regexp.search(en('summary')[0](text = True)[0])== None:#tam_botの発言は無視
  92. regexp1 = re.compile('@')
  93. if regexp1.search(tmp_answer)==None and tmp_answer!='' and search_text.rstrip()!=tmp_answer.rstrip().encode('utf-8'):
  94. answer = tmp_answer
  95. #print >>sys.stderr,'tmp_answer=%s'%tmp_answer
  96. break
  97. post = '@%s %s'%(from_user,answer)
  98.  
  99. #########最後までわからない場合##############
  100. if answer == '':
  101. post = u'@%s %s'%(from_user,unicode(search_text,'utf-8'))
  102.  
  103. #print >>sys.stderr,'post=%s'%post
  104. Twitter(post)
  105. return render_to_response('outanswer.html',{'search_text':search_text,'post':post})
  106.  
  107.  
  108. #質問文章を形態素解析かけて単語を取り出す
  109. def GetKeitaiso_mecapi(question):
  110. question_word=''
  111. word_rank=dict()
  112. i=0
  113. q={'format':'xml','sentence':question,'response':'','filter':'uniq'}
  114. url='http://mimitako.net/api/mecapi.cgi?'+ urllib.urlencode(q)
  115. xml_question=urlfetch.fetch(url).content
  116. #print >>sys.stderr,BeautifulSoup(xml_question)('mecabresult')
  117. ngword=[u'何',u'誰']
  118. for mlist in BeautifulSoup(xml_question)('mecabresult'):
  119. for m in mlist('word'):
  120. term=m('feature')[0].contents[0].split(',')
  121. word=m('surface')[0]
  122. word=tr(u'!"#$%&()=-^~\|[]{}@:`*;+,.<>/\?_。、・', u' ',word)
  123. word=string.replace(word, ' ', '')
  124. #print >>sys.stderr,'%s %s %s %s %s %s %s'%(term[0],term[1],term[2],term[3],term[4],term[5],word)
  125. if (term[0]==u'名詞' or term[0]==u'動詞' or term[0]==u'形容詞') and word != '' and word not in ngword:
  126. cntwd = int(countWord(word))
  127. #print >>sys.stderr,'%s %s %s'%(term[0],word,cntwd)
  128. if i<3 and cntwd<100000000:
  129. question_word = u'%s %s'%(question_word,word)
  130. print >>sys.stderr,'%s %s %s'%(term[0],word,cntwd)
  131. i=i+1
  132. return question_word
  133.  
  134. #Twitterへのpostメソッド
  135. def Twitter(post):
  136. #basic auth
  137. username='username'
  138. password='password'
  139. url = 'http://twitter.com/statuses/update.json'
  140. payload = urllib.urlencode({"status": post.encode("utf-8")})
  141. base64string =b64encode("%s:%s" % (username, password))
  142. headers = {"Authorization": "Basic %s" % base64string}
  143. json_result = urlfetch.fetch(url, payload=payload, method=urlfetch.POST, headers=headers).content
  144. return 1
  145.  
  146.  
  147. #Twitterからあるユーザの直近の質問をとってくる
  148. def GetresponseTwitter():
  149. #basic auth
  150. username='username'
  151. password='password'
  152.  
  153. url = 'http://twitter.com/statuses/replies.xml'
  154. base64string =b64encode("%s:%s" % (username, password))
  155. headers = {"Authorization": "Basic %s" % base64string}
  156. xml_result=urlfetch.fetch(url, payload='', method=urlfetch.POST, headers=headers).content
  157. result_comment=htmlentity2unicode(BeautifulSoup(xml_result)('text')[0].contents[0].split(' ')[1])
  158. from_user=BeautifulSoup(xml_result)('screen_name')[0].contents[0]
  159. return result_comment,from_user
  160.  
  161.  
  162. #googleの検索結果数を見る
  163. def countWord(word):
  164. q = {#'q':'\"'+question+'\"',
  165. 'q':word.encode('utf-8'),
  166. 'v' : 1.0,
  167. 'hl' : 'ja',
  168. 'rsz': 'small'
  169. }
  170. url=u'http://ajax.googleapis.com/ajax/services/search/web?'+ urllib.urlencode(q)
  171. json_result=urlfetch.fetch(url).content
  172. json_result = string.replace(json_result, 'null', '"null"')
  173. try:
  174. dict_result=eval('dict('+json_result+')')
  175. return dict_result['responseData']['cursor']['estimatedResultCount']
  176. except:
  177. return 0
  178.  
  179.  
  180. def tr(pattern, repl, string):
  181. m = dict(zip(pattern, repl))
  182. return ''.join((m.get(c,c) for c in string))
  183.  
  184.  
  185.  
  186. #数値文字参照→文字へ
  187. def htmlentity2unicode(text):
  188. # 正規表現のコンパイル
  189. reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE)
  190. num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
  191. num10_regex = re.compile(u'#\d+', re.IGNORECASE)
  192.  
  193. result = u''
  194. i = 0
  195. while True:
  196. # 実体参照 or 文字参照を見つける
  197. match = reference_regex.search(text, i)
  198. if match is None:
  199. result += text[i:]
  200. break
  201.  
  202. result += text[i:match.start()]
  203. i = match.end()
  204. name = match.group(1)
  205.  
  206. # 実体参照
  207. if name in htmlentitydefs.name2codepoint.keys():
  208. result += unichr(htmlentitydefs.name2codepoint[name])
  209. # 文字参照
  210. elif num16_regex.match(name):
  211. # 16進数
  212. result += unichr(int(u'0'+name[1:], 16))
  213. elif num10_regex.match(name):
  214. # 10進数
  215. result += unichr(int(name[1:]))
  216.  
  217. return result

Report this snippet 

You need to login to post a comment.

Download royalty free graphics