Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

scrapy itemloader example

def parse_item(self, response):

        loader = ItemLoader(GaokaopaiZhuanyeItem(), response)

        loader.add_value('url', response.url)
        loader.add_css('name', u'.majorTitle>h1::text')

        loader.add_xpath('code', u'//div[@class="majorBase"]/h3[starts-with(., "?????")]/text()', re=ur'?(.+)')
        loader.add_xpath('degree', u'//div[@class="majorBase"]/h3[starts-with(., "?????")]/text()', re=ur'?(.+)')
        loader.add_xpath('period', u'//div[@class="majorBase"]/h3[starts-with(., "?????")]/text()', re=ur'?(.+)')
        loader.add_xpath('courses', u'//div[@class="course"]/h3[.="?????"]/following-sibling::p/text()')

        def parse_related():

            for e in response.xpath(u'//div[@class="course"]/h3[.="?????"]/following-sibling::a'):
                yield {
                    'url': e.css('::attr(href)').extract_first(),
                    'code': e.css('::attr(href)').re_first(ur'-([^-]+).html'),
                    'name': e.css('::text').extract_first(),
                }

        loader.add_value('related', list(parse_related()))

        def parse_category():

            category = []

            for i in [u"????", u"????", u"????"]:
                x = u'//h3[.="{}"]/following-sibling::ul[1]/li[@class="current"]/a'.format(i)
                e = response.xpath(x)
                category.append({
                    'url': e.css('::attr(href)').extract_first(),
                    'code': e.css('::attr(href)').re_first(ur'/zhuanye([-0-9]*).html').strip('-'),
                    'name': e.css('::text').extract_first(),
                })

            return category

        loader.add_value('category', parse_category())
        loader.add_css('detail', u'.majorCon')

        item = loader.load_item()

        return Request(
            url='http://www.gaokaopai.com/zhuanye-jiuye-{}.html'.format(item['code'][0]),
            meta={'item': item},
            callback=self.parse_jiuye
        ) 
 
PREVIOUS NEXT
Tagged: #scrapy #itemloader
ADD COMMENT
Topic
Name
2+7 =