import requests
from bs4 import BeautifulSoup
import re
import base64
import json
username="your_email@gmail.com"
password="your_password"
base_url="https://accounts.wsj.com"
session = requests.Session()
r = session.get("{}/login".format(base_url))
soup = BeautifulSoup(r.text, "html.parser")
jscript = [
t.get("src")
for t in soup.find_all("script")
if t.get("src") is not None and "app-min" in t.get("src")
][0]
credentials_search = re.search("Base64.decode('(.*)'", r.text, re.IGNORECASE)
base64_decoded = base64.b64decode(credentials_search.group(1))
credentials = json.loads(base64_decoded)
print("client_id : {}".format(credentials["clientID"]))
print("state : {}".format(credentials["internalOptions"]["state"]))
print("nonce : {}".format(credentials["internalOptions"]["nonce"]))
print("scope : {}".format(credentials["internalOptions"]["scope"]))
r = session.get("{}{}".format(base_url, jscript))
connection_search = re.search('connection:s*"(w+)"', r.text, re.IGNORECASE)
connection = connection_search.group(1)
r = session.post(
'https://sso.accounts.dowjones.com/usernamepassword/login',
data = {
"username": username,
"password": password,
"connection": connection,
"client_id": credentials["clientID"],
"state": credentials["internalOptions"]["state"],
"nonce": credentials["internalOptions"]["nonce"],
"scope": credentials["internalOptions"]["scope"],
"tenant": "sso",
"response_type": "code",
"protocol": "oauth2",
"redirect_uri": "https://accounts.wsj.com/auth/sso/login"
})
soup = BeautifulSoup(r.text, "html.parser")
login_result = dict([
(t.get("name"), t.get("value"))
for t in soup.find_all('input')
if t.get("name") is not None
])
r = session.post(
'https://sso.accounts.dowjones.com/login/callback',
data = login_result)
#check connected user
r = session.get("https://www.wsj.com/articles/singapore-prime-minister-lee-rejects-claims-he-misused-state-powers-in-family-feud-1499094761?tesla=y")
username_search = re.search('"firstName":s*"(w+)",', r.text, re.IGNORECASE)
print("connected user : " + username_search.group(1))