import urllib.parse
query = 'Hellö Wörld@Python'
print(urllib.parse.quote(query))
>> 'Hell%C3%B6%20W%C3%B6rld%40Python'
import urllib.request
import urllib.parse
import re
url = 'https://www.geeksforgeeks.org/'
values = {'s':'python programming',
'submit':'search'}
data = urllib.parse.urlencode(values)
data = data.encode('utf-8')
req = urllib.request.Request(url, data)
resp = urllib.request.urlopen(req)
respData = resp.read()
paragraphs = re.findall(r'<p>(.*?)</p>',str(respData))
for eachP in paragraphs:
print(eachP)
## amazon.py
queries = ['tshirt for men', ‘tshirt for women’]
class AmazonSpider(scrapy.Spider):
def start_requests(self):
for query in queries:
url = 'https://www.amazon.com/s?' + urlencode({'k': query})
yield scrapy.Request(url=url, callback=self.parse_keyword_response)