# Import the required Module
import tabula
# Read a PDF File
df = tabula.read_pdf("IPLmatch.pdf", pages='all')[0]
# convert PDF into CSV
tabula.convert_into("IPLmatch.pdf", "iplmatch.csv", output_format="csv", pages='all')
print(df)
pip install git+https://github.com/pdftables/python-pdftables-api.git
# 1. Download and install java
# 2. Install python library 'tabular-py' using pip
pip install tabula-py
# If this is the first time installing java and tabula-py
# add your Java installation folder to the PATH variable.
# if you don't, this is the error message you'll get.
tabula.errors.JavaNotFoundError: `java` command is not found from this
Python process.Please ensure Java is installed and PATH is set for `java`
# 3. Import and run the tabula function on the desire page on the pdf file.
import tabula
df = tabula.read_pdf('data.pdf', pages = 3, lattice = True)[1]