人工無脳のソース アルゴリズムコンテスト勉強会にて


/ Published in: Python
Save to your folder(s)

アルゴリズムコンテストの勉強会で紹介したGoogle App Engineで動くpythonコードです
Djangoフレームワークを前提にしています。
あまりコードとしては参考にならないかも知れませんが、これだけでも簡単な人工無脳ができるんですね


Copy this code and paste it in your HTML
  1. #!-*- coding:utf-8 -*-
  2. import sys
  3. import os
  4. import string
  5. import urllib
  6. import urllib2
  7. import math
  8. import htmlentitydefs
  9. import re
  10. from django.shortcuts import render_to_response
  11. from google.appengine.api import users
  12. from django.http import HttpResponseRedirect
  13. from BeautifulSoup import BeautifulSoup
  14. from google.appengine.api import urlfetch
  15. from base64 import b64encode
  16.  
  17.  
  18. #初期ページ
  19. def Mainpage(request):
  20. return render_to_response('input.html',)
  21.  
  22.  
  23. #質問をもらって回答を出すまでのメソッド
  24. def getAnswer(request):
  25. search_text = ''
  26. from_user = 'username'
  27. try:
  28. search_text=request.POST['query']
  29. search_text=search_text.encode('utf-8')
  30. except:pass
  31. if search_text == '':
  32. search_text,from_user = GetresponseTwitter()
  33. #print >>sys.stderr,'search_text=%s'%search_text
  34. search_text = search_text.encode('utf-8')
  35. to_ = ''
  36. from_ = ''
  37.  
  38. #質問文をそのまま検索にかける
  39. if search_text!='' :
  40. url = 'http://pcod.no-ip.org/yats/search?query=%s&rss' % (search_text)
  41. xml_search2=urlfetch.fetch(url).content
  42. entry = BeautifulSoup(xml_search2)('entry')
  43. words=[]
  44. titledict=dict()
  45. answer = ''
  46. for en in entry:
  47. #print >>sys.stderr,en('summary')[0]
  48. words=en('summary')[0](text = True)[0].split(' ')
  49. update_before = en('updated')[0](text = True)[0]
  50. #print >>sys.stderr,status_id
  51. try:
  52. regexp = re.compile('username')
  53. if regexp.search(en('summary')[0](text = True)[0])== None:#tam_botの発言は無視
  54. from_=words[0].split('@')[1]
  55. to_=words[2].split('@')[1]
  56.  
  57. url='http://pcod.no-ip.org/yats/search?query=user:'+to_+'%20%40'+from_+'%20&lang=ja&rss'
  58. #print >>sys.stderr,url
  59. xml_search=urlfetch.fetch(url).content
  60. title1to2 = BeautifulSoup(xml_search)('entry')
  61. for x in title1to2:
  62. #titledict[x('updated')[0](text = True)[0]]=x('summary')[0].contents[0]
  63. update_after=x('updated')[0](text = True)[0]
  64. if update_after > update_before:
  65. #print >>sys.stderr,'update_before=%s update_after=%s'%(update_before,update_after)
  66. tmp_answer=x('summary')[0](text = True)[0].split(' ')[3]
  67. regexp1 = re.compile('@')
  68. if regexp1.search(tmp_answer)==None and tmp_answer!='':
  69. print >>sys.stderr,'answer=%s'%tmp_answer
  70. answer = tmp_answer
  71. break#一個でもみつかればbreak
  72. if tmp_answer != '':
  73. break
  74. except:pass
  75. #print >>sys.stderr,'answer=%s'%answer
  76.  
  77.  
  78. if answer != '':#検索結果があった場合
  79. post = '@%s %s'%(from_user,answer)
  80. else:#検索結果がなかった場合
  81. #############文章だけで回答が出せない場合#############
  82. question_word=GetKeitaiso_mecapi(search_text).encode('utf-8')
  83. q = {'query':question_word,'lang' : 'ja'}
  84. url = 'http://pcod.no-ip.org/yats/search?rss&' + urllib.urlencode(q)
  85. xml=urlfetch.fetch(url).content
  86. #print >>sys.stderr,xml
  87. entry = BeautifulSoup(xml)('entry')
  88. for en in entry:
  89. tmp_answer=en('summary')[0](text = True)[0].split(' : ')[1].rstrip()
  90. regexp = re.compile('tam_bot')
  91. if regexp.search(en('summary')[0](text = True)[0])== None:#tam_botの発言は無視
  92. regexp1 = re.compile('@')
  93. if regexp1.search(tmp_answer)==None and tmp_answer!='' and search_text.rstrip()!=tmp_answer.rstrip().encode('utf-8'):
  94. answer = tmp_answer
  95. #print >>sys.stderr,'tmp_answer=%s'%tmp_answer
  96. break
  97. post = '@%s %s'%(from_user,answer)
  98.  
  99. #########最後までわからない場合##############
  100. if answer == '':
  101. post = u'@%s %s'%(from_user,unicode(search_text,'utf-8'))
  102.  
  103. #print >>sys.stderr,'post=%s'%post
  104. Twitter(post)
  105. return render_to_response('outanswer.html',{'search_text':search_text,'post':post})
  106.  
  107.  
  108. #質問文章を形態素解析かけて単語を取り出す
  109. def GetKeitaiso_mecapi(question):
  110. question_word=''
  111. word_rank=dict()
  112. i=0
  113. q={'format':'xml','sentence':question,'response':'','filter':'uniq'}
  114. url='http://mimitako.net/api/mecapi.cgi?'+ urllib.urlencode(q)
  115. xml_question=urlfetch.fetch(url).content
  116. #print >>sys.stderr,BeautifulSoup(xml_question)('mecabresult')
  117. ngword=[u'何',u'誰']
  118. for mlist in BeautifulSoup(xml_question)('mecabresult'):
  119. for m in mlist('word'):
  120. term=m('feature')[0].contents[0].split(',')
  121. word=m('surface')[0]
  122. word=tr(u'!"#$%&()=-^~\|[]{}@:`*;+,.<>/\?_。、・', u' ',word)
  123. word=string.replace(word, ' ', '')
  124. #print >>sys.stderr,'%s %s %s %s %s %s %s'%(term[0],term[1],term[2],term[3],term[4],term[5],word)
  125. if (term[0]==u'名詞' or term[0]==u'動詞' or term[0]==u'形容詞') and word != '' and word not in ngword:
  126. cntwd = int(countWord(word))
  127. #print >>sys.stderr,'%s %s %s'%(term[0],word,cntwd)
  128. if i<3 and cntwd<100000000:
  129. question_word = u'%s %s'%(question_word,word)
  130. print >>sys.stderr,'%s %s %s'%(term[0],word,cntwd)
  131. i=i+1
  132. return question_word
  133.  
  134. #Twitterへのpostメソッド
  135. def Twitter(post):
  136. #basic auth
  137. username='username'
  138. password='password'
  139. url = 'http://twitter.com/statuses/update.json'
  140. payload = urllib.urlencode({"status": post.encode("utf-8")})
  141. base64string =b64encode("%s:%s" % (username, password))
  142. headers = {"Authorization": "Basic %s" % base64string}
  143. json_result = urlfetch.fetch(url, payload=payload, method=urlfetch.POST, headers=headers).content
  144. return 1
  145.  
  146.  
  147. #Twitterからあるユーザの直近の質問をとってくる
  148. def GetresponseTwitter():
  149. #basic auth
  150. username='username'
  151. password='password'
  152.  
  153. url = 'http://twitter.com/statuses/replies.xml'
  154. base64string =b64encode("%s:%s" % (username, password))
  155. headers = {"Authorization": "Basic %s" % base64string}
  156. xml_result=urlfetch.fetch(url, payload='', method=urlfetch.POST, headers=headers).content
  157. result_comment=htmlentity2unicode(BeautifulSoup(xml_result)('text')[0].contents[0].split(' ')[1])
  158. from_user=BeautifulSoup(xml_result)('screen_name')[0].contents[0]
  159. return result_comment,from_user
  160.  
  161.  
  162. #googleの検索結果数を見る
  163. def countWord(word):
  164. q = {#'q':'\"'+question+'\"',
  165. 'q':word.encode('utf-8'),
  166. 'v' : 1.0,
  167. 'hl' : 'ja',
  168. 'rsz': 'small'
  169. }
  170. url=u'http://ajax.googleapis.com/ajax/services/search/web?'+ urllib.urlencode(q)
  171. json_result=urlfetch.fetch(url).content
  172. json_result = string.replace(json_result, 'null', '"null"')
  173. try:
  174. dict_result=eval('dict('+json_result+')')
  175. return dict_result['responseData']['cursor']['estimatedResultCount']
  176. except:
  177. return 0
  178.  
  179.  
  180. def tr(pattern, repl, string):
  181. m = dict(zip(pattern, repl))
  182. return ''.join((m.get(c,c) for c in string))
  183.  
  184.  
  185.  
  186. #数値文字参照→文字へ
  187. def htmlentity2unicode(text):
  188. # 正規表現のコンパイル
  189. reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE)
  190. num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
  191. num10_regex = re.compile(u'#\d+', re.IGNORECASE)
  192.  
  193. result = u''
  194. i = 0
  195. while True:
  196. # 実体参照 or 文字参照を見つける
  197. match = reference_regex.search(text, i)
  198. if match is None:
  199. result += text[i:]
  200. break
  201.  
  202. result += text[i:match.start()]
  203. i = match.end()
  204. name = match.group(1)
  205.  
  206. # 実体参照
  207. if name in htmlentitydefs.name2codepoint.keys():
  208. result += unichr(htmlentitydefs.name2codepoint[name])
  209. # 文字参照
  210. elif num16_regex.match(name):
  211. # 16進数
  212. result += unichr(int(u'0'+name[1:], 16))
  213. elif num10_regex.match(name):
  214. # 10進数
  215. result += unichr(int(name[1:]))
  216.  
  217. return result

URL: http://www.team-lab.com/news/index.php?itemid=469

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.