# GZIP compress bytes
import gzip
s_in = b"some random text"
s_out = gzip.compress(s_in)
# GZIP decompress bytes
import gzip
s_in = b"some random text"
s_out = gzip.compress(s_in)
print(s_out)
print(gzip.decompress(s_out))
# GZIP compress file
import gzip
import shutil
with open('/mypath/file.txt', 'rb') as f_in:
with gzip.open('/mypath/file.txt.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
# GZIP Decompress file
import gzip
import shutil
with gzip.open('file.txt.gz', 'rb') as f_in:
with open('file.txt', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
# GZIP Compress file from command line
python3.10 -m gzip --fast test.txt # fast compression
python3.10 -m gzip --best test.txt # best compression
# GZIP Decompress file from command line
python3.10 -m gzip -d test.txt.gz
import gzip
f_in = open('/home/joe/file.txt')
f_out = gzip.open('/home/joe/file.txt.gz', 'wb')
f_out.writelines(f_in)
f_out.close()
f_in.close()
import gzip
import shutil
with open('/home/joe/file.txt', 'rb') as f_in:
with gzip.open('/home/joe/file.txt.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
def download_dataset(url, target_path="data/", keep_download=True, overwrite_download=False):
"""Downloads dataset from a url.
url: string, a dataset path
target_path: string, path where data will be downloaded
keep_download: boolean, keeps the original file after extraction
overwrite_download: boolean, stops download if dataset already exists
"""
if url == "" or url is None:
raise Exception(EMPTY_URL_ERROR)
filename = get_filename(url)
file_location = get_file_location(target_path, filename)
os.makedirs(data_dir, exist_ok=True)
if os.path.exists(file_location) and not overwrite_download:
print(f"File already exists at {file_location}. Use: 'overwrite_download=True' to
overwrite download")
extract_file(target_path, filename)
return
print(f"Downloading file from {url} to {file_location}.")
# Download
with open(file_location, 'wb') as f:
with requests.get(url, allow_redirects=True, stream=True) as resp:
for chunk in resp.iter_content(chunk_size = 512): #chunk_size in bytes
if chunk:
f.write(chunk)
print("Finished downloading.")
print("Extracting the file now ...")
extract_file(target_path, filename)
if not keep_download:
os.remove(file_location)