def parse_item(self, response):
url = response.url
item_idx = self.all_urls[url]
self.logger.info("Trying page %s %s" % (item_idx, url))
resp_dct = json.loads(response.body)
l = ItemLoader(item=HeatMapItem(), response=response)
current_hour = time.strftime("%Y%m%d%H", time.localtime())
l.add_value('cur_hour', current_hour)
l.add_value('serial', item_idx)
l.add_value('data', resp_dct.pop('data'))
l.add_value('timestamp', resp_dct.pop('nt'))
l.add_value('others', resp_dct)
l.add_value('url', url)
l.add_value('is_parsed', 0)
self.finished.add(item_idx)
self.logger.info(u"Crawling %s, %s successfully. :)" % (item_idx, url))
self.claim_completeness()
yield l.load_item()
# else:
# if resp_dct.get("data") == "u8be5u7528u6237u8bbfu95eeu6b21u6570u8fc7u591a".decode(
# 'unicode_escape'): # ??????
# banned_cookie = response.request.cookies
# self.logger.warning("%s has been BANNED today." % banned_cookie)
# self.cookies.remove(banned_cookie)
# yield {"BannedCookieToday": banned_cookie}
# else:
# yield {}
# self.logger.error(u"Crawling %s, %s failed. :(" % (item_idx, response.url))