import re def cleanhtml(raw_html): cleanr = re.compile('<.*?>') cleantext = re.sub(cleanr, '', raw_html) return cleantext
from bs4 import BeautifulSoup cleantext = BeautifulSoup(raw_html, "lxml").text