##download_backblaze_data.py###################################################
#
# Requirements
#
#    -  wget (`pip install wget`)
#
# How to use:
#
# `python download_backblaze_data.py [parameter]`
#
# Parameters:
#
#    - parameter: either `full` or `min`
#
#    If set to `full` will download the data sets used in Chapter 7 (4 files,
#    ~2.3GB compressed, 12.4GB uncompressed).
#
#    If set to `minimal` will download only 2019 Q3 (1 file, 574MB compressed,
#    3.1GB uncompressed).
#
###############################################################################

import sys
import wget
import os
import zipfile


DATASETS_FULL = [
    "https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2019.zip",
    "https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q2_2019.zip",
    "https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q3_2019.zip",
    "https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q4_2019.zip",
]

DATASETS_MINIMAL = DATASETS_FULL[2:3]  # Slice to keep as a list. Simplifies
# the code later.

if __name__ == "__main__":

    try:
        param = sys.argv[1]

        if param.lower() == "full":
            datasets = DATASETS_FULL
        elif param.lower() == "min":
            datasets = DATASETS_MINIMAL
        else:
            raise AssertionError()
    except (AssertionError, IndexError):
        print(
            "Parameter missing. Refer to the documentation at the top "
            "of the source code for more information"
        )
        sys.exit(1)

    out_dir = "./data/backblaze/"
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    
    for dataset in datasets:
        file_name = dataset.split("/")[-1]
        zip_file = os.path.join(out_dir, file_name)
        if not os.path.exists(zip_file):
            print("\nGoing to download ", file_name)
            wget.download(dataset, out=out_dir)
        else:
            print(f"File {file_name} is already downloaded")
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            print(f"Unzipping the {zip_file}...")
            zip_ref.extractall(out_dir)
