Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

scrapy itemloader example

def parse_item(self, response):
        url = response.url
        item_idx = self.all_urls[url]
        self.logger.info("Trying page %s %s" % (item_idx, url))

        resp_dct = json.loads(response.body)

        l = ItemLoader(item=HeatMapItem(), response=response)
        current_hour = time.strftime("%Y%m%d%H", time.localtime())
        l.add_value('cur_hour', current_hour)
        l.add_value('serial', item_idx)
        l.add_value('data', resp_dct.pop('data'))
        l.add_value('timestamp', resp_dct.pop('nt'))
        l.add_value('others', resp_dct)
        l.add_value('url', url)
        l.add_value('is_parsed', 0)

        self.finished.add(item_idx)
        self.logger.info(u"Crawling %s, %s successfully. :)" % (item_idx, url))
        self.claim_completeness()
        yield l.load_item()
        # else:
        #     if resp_dct.get("data") == "u8be5u7528u6237u8bbfu95eeu6b21u6570u8fc7u591a".decode(
        #             'unicode_escape'):  # ??????
        #         banned_cookie = response.request.cookies
        #         self.logger.warning("%s has been BANNED today." % banned_cookie)
        #         self.cookies.remove(banned_cookie)
        #         yield {"BannedCookieToday": banned_cookie}
        #     else:
        #         yield {}
        #     self.logger.error(u"Crawling %s, %s failed. :(" % (item_idx, response.url)) 
 
PREVIOUS NEXT
Tagged: #scrapy #itemloader
ADD COMMENT
Topic
Name
1+5 =