Return to Snippet

Revision: 11801
at February 19, 2009 08:09 by tamuratetsuya


Initial Code
#!-*- coding:utf-8 -*-
import sys
import os
import string
import urllib
import urllib2
import math
import htmlentitydefs
import re
from django.shortcuts import render_to_response
from google.appengine.api import users
from django.http import HttpResponseRedirect
from BeautifulSoup import BeautifulSoup
from google.appengine.api import urlfetch
from base64 import b64encode


#初期ページ
def Mainpage(request):
    return render_to_response('input.html',)


#質問をもらって回答を出すまでのメソッド
def getAnswer(request):
    search_text = ''
    from_user = 'username'
    try:
        search_text=request.POST['query']
        search_text=search_text.encode('utf-8')
    except:pass
    if search_text == '':
        search_text,from_user = GetresponseTwitter()
        #print >>sys.stderr,'search_text=%s'%search_text
        search_text = search_text.encode('utf-8')
    to_ = ''
    from_ = ''
    
    #質問文をそのまま検索にかける
    if search_text!='' :
        url = 'http://pcod.no-ip.org/yats/search?query=%s&rss' % (search_text)
        xml_search2=urlfetch.fetch(url).content
        entry = BeautifulSoup(xml_search2)('entry')
        words=[]
        titledict=dict()
        answer = ''
        for en in entry:
            #print >>sys.stderr,en('summary')[0]
            words=en('summary')[0](text = True)[0].split(' ')
            update_before = en('updated')[0](text = True)[0]
            #print >>sys.stderr,status_id
            try:
                regexp = re.compile('username')
                if regexp.search(en('summary')[0](text = True)[0])== None:#tam_botの発言は無視
                    from_=words[0].split('@')[1]
                    to_=words[2].split('@')[1]
                    
                    url='http://pcod.no-ip.org/yats/search?query=user:'+to_+'%20%40'+from_+'%20&lang=ja&rss'
                    #print >>sys.stderr,url
                    xml_search=urlfetch.fetch(url).content
                    title1to2 = BeautifulSoup(xml_search)('entry')
                    for x in title1to2:
                        #titledict[x('updated')[0](text = True)[0]]=x('summary')[0].contents[0]
                        update_after=x('updated')[0](text = True)[0]
                        if  update_after > update_before:
                            #print >>sys.stderr,'update_before=%s update_after=%s'%(update_before,update_after)
                            tmp_answer=x('summary')[0](text = True)[0].split(' ')[3]
                            regexp1 = re.compile('@')
                            if regexp1.search(tmp_answer)==None and tmp_answer!='':
                                print >>sys.stderr,'answer=%s'%tmp_answer
                                answer = tmp_answer
                                break#一個でもみつかればbreak
                    if tmp_answer != '':
                        break
            except:pass
        #print >>sys.stderr,'answer=%s'%answer
        
        
        if answer != '':#検索結果があった場合
            post = '@%s %s'%(from_user,answer)
        else:#検索結果がなかった場合
            #############文章だけで回答が出せない場合#############
            question_word=GetKeitaiso_mecapi(search_text).encode('utf-8')
            q = {'query':question_word,'lang' : 'ja'}            
            url = 'http://pcod.no-ip.org/yats/search?rss&' + urllib.urlencode(q)
            xml=urlfetch.fetch(url).content
            #print >>sys.stderr,xml
            entry = BeautifulSoup(xml)('entry')
            for en in entry:
                tmp_answer=en('summary')[0](text = True)[0].split(' : ')[1].rstrip()
                regexp = re.compile('tam_bot')
                if regexp.search(en('summary')[0](text = True)[0])== None:#tam_botの発言は無視
                    regexp1 = re.compile('@')
                    if regexp1.search(tmp_answer)==None and tmp_answer!='' and search_text.rstrip()!=tmp_answer.rstrip().encode('utf-8'):
                        answer = tmp_answer
                        #print >>sys.stderr,'tmp_answer=%s'%tmp_answer
                        break
            post = '@%s %s'%(from_user,answer)
            
            #########最後までわからない場合##############
            if answer == '':
                post = u'@%s %s'%(from_user,unicode(search_text,'utf-8'))
    
    #print >>sys.stderr,'post=%s'%post
    Twitter(post)        
    return render_to_response('outanswer.html',{'search_text':search_text,'post':post})

    
#質問文章を形態素解析かけて単語を取り出す
def GetKeitaiso_mecapi(question):
    question_word=''
    word_rank=dict()
    i=0
    q={'format':'xml','sentence':question,'response':'','filter':'uniq'}
    url='http://mimitako.net/api/mecapi.cgi?'+ urllib.urlencode(q)
    xml_question=urlfetch.fetch(url).content
    #print >>sys.stderr,BeautifulSoup(xml_question)('mecabresult')
    ngword=[u'何',u'誰']
    for mlist in BeautifulSoup(xml_question)('mecabresult'):
        for m in mlist('word'):
            term=m('feature')[0].contents[0].split(',')
            word=m('surface')[0]
            word=tr(u'!"#$%&()=-^~\|[]{}@:`*;+,.<>/\?_。、・', u'                                   ',word)
            word=string.replace(word, ' ', '')
            #print >>sys.stderr,'%s %s %s %s %s %s %s'%(term[0],term[1],term[2],term[3],term[4],term[5],word)
            if (term[0]==u'名詞' or term[0]==u'動詞' or term[0]==u'形容詞') and word != '' and word not in ngword:
                cntwd = int(countWord(word))
                #print >>sys.stderr,'%s %s %s'%(term[0],word,cntwd)
                if i<3 and cntwd<100000000:
                    question_word = u'%s %s'%(question_word,word)
                    print >>sys.stderr,'%s %s %s'%(term[0],word,cntwd)
                    i=i+1
    return question_word

#Twitterへのpostメソッド
def Twitter(post):
    #basic auth
    username='username'
    password='password'
    url = 'http://twitter.com/statuses/update.json'
    payload = urllib.urlencode({"status": post.encode("utf-8")})
    base64string =b64encode("%s:%s" % (username, password))
    headers = {"Authorization": "Basic %s" % base64string}
    json_result = urlfetch.fetch(url, payload=payload, method=urlfetch.POST, headers=headers).content
    return 1


#Twitterからあるユーザの直近の質問をとってくる
def GetresponseTwitter():
    #basic auth
    username='username'
    password='password'

    url = 'http://twitter.com/statuses/replies.xml'
    base64string =b64encode("%s:%s" % (username, password))
    headers = {"Authorization": "Basic %s" % base64string}
    xml_result=urlfetch.fetch(url, payload='', method=urlfetch.POST, headers=headers).content
    result_comment=htmlentity2unicode(BeautifulSoup(xml_result)('text')[0].contents[0].split(' ')[1])
    from_user=BeautifulSoup(xml_result)('screen_name')[0].contents[0]
    return result_comment,from_user


#googleの検索結果数を見る
def countWord(word):
    q = {#'q':'\"'+question+'\"',
        'q':word.encode('utf-8'),
        'v' : 1.0,
        'hl' : 'ja',
        'rsz': 'small'
    }
    url=u'http://ajax.googleapis.com/ajax/services/search/web?'+ urllib.urlencode(q)
    json_result=urlfetch.fetch(url).content
    json_result = string.replace(json_result, 'null', '"null"')
    try:
        dict_result=eval('dict('+json_result+')')        
        return dict_result['responseData']['cursor']['estimatedResultCount']
    except:
        return 0


def tr(pattern, repl, string):
    m = dict(zip(pattern, repl))
    return ''.join((m.get(c,c) for c in string))



#数値文字参照→文字へ
def htmlentity2unicode(text):
    # 正規表現のコンパイル
    reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE)
    num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
    num10_regex = re.compile(u'#\d+', re.IGNORECASE)
    
    result = u''
    i = 0
    while True:
        # 実体参照 or 文字参照を見つける
        match = reference_regex.search(text, i)
        if match is None:
            result += text[i:]
            break
        
        result += text[i:match.start()]
        i = match.end()
        name = match.group(1)
        
        # 実体参照
        if name in htmlentitydefs.name2codepoint.keys():
            result += unichr(htmlentitydefs.name2codepoint[name])
        # 文字参照
        elif num16_regex.match(name):
            # 16進数
            result += unichr(int(u'0'+name[1:], 16))
        elif num10_regex.match(name):
            # 10進数
            result += unichr(int(name[1:]))
        
    return result

Initial URL
http://www.team-lab.com/news/index.php?itemid=469

Initial Description
アルゴリズムコンテストの勉強会で紹介したGoogle App Engineで動くpythonコードです
Djangoフレームワークを前提にしています。
あまりコードとしては参考にならないかも知れませんが、これだけでも簡単な人工無脳ができるんですね

Initial Title
人工無脳のソース アルゴリズムコンテスト勉強会にて

Initial Tags
python

Initial Language
Python