파이썬 스트리밍 인코딩문제점.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tweepy
import json
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class listener(StreamListener):
def on_data(self, data):
try:
print data
tweet = data.split(',"text":"')[1].split('","source')[0]
print tweet
saveThis = str(time.time())+'::' + tweet
saveFile = open("tweetDB3.csv", "a")
saveFile.write(saveThis)
saveFile.write("\n")
saveFile.close()
return True
except BaseException, e:
print "failed ondata,",str(e)
time.sleep(5)
def on_error(self, status):
print status
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track = ['오늘'])
1465042178.01::RT @BTS_twt: korea#\ud83c\uddf0\ud83c\uddf7 https:\/\/t.co\/zwKaGo4Lcj
1465042181.76::RT @wdfrog: \ud5e4\ub7f4\ub4dc \uacbd\uc81c\uac00 \uc774\ubc88 \uc77c\ub85c \uc0ac\uacfc\ubb38\uc744 \uc62c\ub838\uc9c0\ub9cc \uc774\uc790\ub4e4\uc740 \ubd88\uacfc 3\uac1c\uc6d4 \uc804\uc778 3\uc6d4 4\uc77c\uc5d0\ub3c4 \uc55e\uc73c\ub85c \uc870\uc2ec\ud558\uaca0\ub2e4\ub294 \uc0ac\uacfc\ubb38\uc744 \uc62c\ub9b0 \ubc14 \uc788\ub2e4. \uc77c\uc774 \ucee4\uc9c8\uae4c \uba74\ud53c\ud558\ub294 \uac83\uc774\ub2c8 \uc5b8\ub860\uc911\uc7ac\uc704\uc5d0 \ud55c\uce35 \uac00\uc5f4\ucc28\uac8c \ubbfc\uc6d0\uc744 \ub123\uc74d\uc2dc\ub2e4\nhttps:\/\/t.co\/Wb\u2026
트위터 API 스트림을 했을때 예를들면
이런식으로 엑셀파일이 형성됩니다.
유니코드를 자동으로 한글로 변환시키는게 가능한가요?
파이썬 코드를 바꿔보려해도 안되고 엑셀에서 바꾸려해도안되네요.
부탁드립니다.
댓글 달기