Source code for pymodis.downmodis

#!/usr/bin/env python
#  class to download modis data
#
#  (c) Copyright Luca Delucchi 2010-2016
#  (c) Copyright Logan C Byers 2014
#  Authors: Luca Delucchi
#           Logan C Byers
#  Email: luca dot delucchi at fmach dot it
#         loganbyers@ku.edu
#
##################################################################
#
#  This MODIS Python class is licensed under the terms of GNU GPL 2.
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of
#  the License, or (at your option) any later version.
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#  See the GNU General Public License for more details.
#
##################################################################
"""Module to download MODIS HDF files from NASA repository.
It supports both FTP and HTTP repositories

Classes:

* :class:`modisHtmlParser`
* :class:`downModis`

Functions:

* :func:`urljoin`
* :func:`getNewerVersion`
* :func:`str2date`

"""

# python 2 and 3 compatibility
from __future__ import print_function
from builtins import dict

from datetime import date
from datetime import timedelta
import os
import sys
import glob
import logging
import socket
from ftplib import FTP
import ftplib

import requests
# urllib in python 2 and 3
try:
    from future.standard_library import install_aliases
    install_aliases()
except ImportError:
    raise ImportError("Future library not found, please install it")
from urllib.request import urlopen
import urllib.request
import urllib.error
from base64 import b64encode
from html.parser import HTMLParser
import re
import netrc
import warnings
# urlparse in python 2 and 3
try:
    from urlparse import urlparse
    URLPARSE = True
except ImportError:
    try:
        from urllib.parse import urlparse
        URLPARSE = True
    except ImportError:
        URLPARSE = False
        warnings.warn('urlparse not found, it is not possible to use'
                      ' netrc file', ImportError)
global GDAL

try:
    import osgeo.gdal as gdal
    GDAL = True
except ImportError:
    try:
        import gdal
        GDAL = True
    except ImportError:
        GDAL = False
        warnings.warn('Python GDAL library not found, please install'
                      ' it to check data downloaded with pyModis', ImportError)
# setup gdal
if GDAL:
    gdal.UseExceptions()
    gdalDriver = gdal.GetDriverByName('HDF4')
    if not gdalDriver:
        GDAL = False
        warnings.warn("GDAL installation has no support for HDF4, "
                      "please update GDAL", ImportError)


[docs]def urljoin(*args):
    """Joins given arguments into a url. Trailing but not leading slashes are
    stripped for each argument.
    http://stackoverflow.com/a/11326230

    :return: a string
    """

    return "/".join([str(x).rstrip('/') for x in args])


[docs]def getNewerVersion(oldFile, newFile):
    """Check two files to determine which is newer

       :param str oldFile: one of the two similar files
       :param str newFile: one of the two similar files

       :return: the name of newer file
    """
    # get the processing date (YYYYDDDHHMMSS) from the file strings
    if oldFile.split('.')[4] > newFile.split('.')[4]:
        return oldFile
    else:
        return newFile


[docs]def str2date(datestring):
    """Convert to datetime.date object from a string

       :param str datestring string with format (YYYY-MM-DD)
       :return: a datetime.date object representing datestring
    """
    if '-' in datestring:
        stringSplit = datestring.split('-')
    elif '.' in datestring:
        stringSplit = datestring.split('.')
    elif ' ' in datestring:
        stringSplit = datestring.split(' ')
    return date(int(stringSplit[0]), int(stringSplit[1]), int(stringSplit[2]))


[docs]class ModisHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
    """Class to return 302 error"""
[docs]    def http_error_302(self, req, fp, code, msg, headers):
        return urllib.request.HTTPRedirectHandler.http_error_302(self, req, fp,
                                                                 code, msg,
                                                                 headers)


[docs]class modisHtmlParser(HTMLParser):
    """A class to parse HTML

       :param fh: content of http request
    """
    def __init__(self, fh):
        """Function to initialize the object"""
        HTMLParser.__init__(self)
        self.fileids = []
        self.feed(str(fh))

[docs]    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            attrD = dict(attrs)
            self.fileids.append(attrD['href'].replace('/', ''))

[docs]    def get_all(self):
        """Return everything"""
        return self.fileids

[docs]    def get_dates(self):
        """Return a list of directories with date"""
        regex = re.compile('(\d{4})[/.-](\d{2})[/.-](\d{2})$')
        alldata = set([elem for elem in self.fileids if regex.match(elem)])
        return sorted(list(alldata))

[docs]    def get_tiles(self, prod, tiles, jpeg=False):
        """Return a list of files to download

           :param str prod: the code of MODIS product that we are going to
                            analyze
           :param list tiles: the list of tiles to consider
           :param bool jpeg: True to also check for jpeg data
        """
        finalList = []
        for i in self.fileids:
            # distinguish jpg from hdf by where the tileID is within the string
            # jpgs have the tileID at index 3, hdf have tileID at index 2
            name = i.split('.')
            # if product is not in the filename, move to next filename in list
            if not name.count(prod):
                continue
            # if tiles are not specified and the file is not a jpg, add to list
            if not tiles and not (name.count('jpg') or name.count('BROWSE')):
                finalList.append(i)
            # if tiles are specified
            if tiles:
                # if a tileID is at index 3 and jpgs are to be downloaded
                if tiles.count(name[3]) == 1 and jpeg:
                    finalList.append(i)
                # if a tileID is at in index 2, it is known to be HDF
                elif tiles.count(name[2]) == 1:
                    finalList.append(i)
        return finalList


[docs]class downModis:
    """A class to download MODIS data from NASA FTP or HTTP repositories

       :param str destinationFolder: where the files will be stored
       :param str password: the password required by NASA authentication system
       :param str user: the user namerequired by NASA authentication system
       :param str url: the base url from where to download the MODIS data,
                       it can be FTP or HTTP but it has to start with
                       'ftp://' or 'http://' or 'https://'
       :param str path: the directory where the data that you want to
                        download are stored on the FTP server. For HTTP
                        requests, this is the part of the url between the 'url'
                        parameter and the 'product' parameter.
       :param str product: the code of the product to download, the code
                           should be idential to the one of the url
       :param str tiles: a set of tiles to be downloaded, None == all tiles.
                         This can be passed as a string of tileIDs separated
                         by commas, or as a list of individual tileIDs
       :param str today: the day to start downloading; in order to pass a
                         date different from today use the format YYYY-MM-DD
       :param str enddate: the day to end downloading; in order to pass a
                           date use the format YYYY-MM-DD. This day must be
                           before the 'today' parameter. Downloading happens
                           in reverse order (currently)

       :param int delta: timelag i.e. the number of days starting from
                         today backwards. Will be overwritten if
                         'enddate' is specifed during instantiation
       :param bool jpeg: set to True if you want to download the JPG overview
                         file in addition to the HDF
       :param bool debug: set to True if you want to obtain debug information
       :param int timeout: Timeout value for HTTP server (seconds)
       :param bool checkgdal: variable to set the GDAL check
    """

    def __init__(self, destinationFolder, password=None, user=None, token=None,
                 url="https://e4ftl01.cr.usgs.gov", tiles=None, path="MOLT",
                 product="MOD11A1.006", today=None, enddate=None, delta=10,
                 jpg=False, debug=False, timeout=30, checkgdal=True):
        """Function to initialize the object"""

        self.token = None
        self.user = None
        self.password = None
        
        # prepare the base url and set the url type (ftp/http)
        if 'ftp://' in url:
            self.url = url.replace('ftp://', '').rstrip('/')
            self.urltype = 'ftp'
        elif 'http://' in url:
            self.url = url
            self.urltype = 'http'
        elif 'https://' in url:
            self.url = url
            self.urltype = 'http'
        else:
            raise IOError("The url should contain 'ftp://' or 'http://'")

        # token case
        if token: 
            # token for download
            self.token = token
        # user and password case
        elif user and password:
            # user for download
            self.user = user
            # password for download
            self.password = password
        # netrc case
        else:
            if not URLPARSE:
                raise IOError("Please use 'user' and 'password' parameters")
            self.domain = urlparse(self.url).hostname
            try:
                nt = netrc.netrc()
            except:
                raise IOError("Please set 'user' and 'password' parameters netrc file does not exist")
            try:
                account = nt.hosts[self.domain]
            except:
                try:
                    account = nt.hosts['urs.earthdata.nasa.gov']
                except:
                    raise IOError("Please set 'user' and 'password' parameters netrc file does not contain parameter for NASA url")
            # user for download
            self.user = account[0]
            # password for download
            self.password = account[2]
            # token for download from password
            self.token = self.password if self.user == "token" else None
            
        if not self.user and not self.password and not self.token:
            raise IOError("You must provide either a token or a user and password")
            
        # set the http header
        if self.token:
            self.http_header = {'Authorization': f"Bearer {self.token}"}
        else:
            self.userpwd = "{us}:{pw}".format(us=self.user, pw=self.password)
            userAndPass = b64encode(str.encode(self.userpwd)).decode("ascii")
            self.http_header = {'Authorization': 'Basic %s' %  userAndPass}

        cookieprocessor = urllib.request.HTTPCookieProcessor()
        opener = urllib.request.build_opener(ModisHTTPRedirectHandler,
                                             cookieprocessor)
        urllib.request.install_opener(opener)
        # the product (product_code.004 or product_cod.005)
        self.product = product
        self.product_code = product.split('.')[0]
        # url directory where data are located
        self.path = urljoin(path, self.product)
        # tiles to downloads
        if isinstance(tiles, str):
            self.tiles = tiles.split(',')
        else:  # tiles are list, tuple, or None
            self.tiles = tiles
        # set destination folder
        if not os.path.isdir(destinationFolder):
            os.makedirs(destinationFolder)
            self.writeFilePath = destinationFolder
        elif os.access(destinationFolder, os.W_OK):
            self.writeFilePath = destinationFolder
        else:
            try:
                os.mkdir(destinationFolder)
                self.writeFilePath = destinationFolder
            except:
                raise Exception("Folder to store downloaded files does not "
                                "exist or is not writeable")
        # return the name of product
        if len(self.path.split('/')) == 2:
            self.product = self.path.split('/')[1]
        elif len(self.path.split('/')) == 3:
            self.product = self.path.split('/')[2]
        # write a file with the name of file to be downloaded
        self.filelist = open(os.path.join(self.writeFilePath,
                                          'listfile{pro}.txt'.format(pro=self.product)),
                             'w')
        # set if to download jpgs
        self.jpeg = jpg
        # today, or the last day in the download series chronologically
        self.today = today
        # chronologically the first day in the download series
        self.enday = enddate
        # default number of days to consider if enddate not specified
        self.delta = delta
        # status of tile download
        self.status = True
        # for debug, you can download only xml files
        self.debug = debug
        # for logging
        log_filename = os.path.join(self.writeFilePath,
                                    'modis{pro}.log'.format(pro=self.product))
        log_format = '%(asctime)s - %(levelname)s - %(message)s'
        logging.basicConfig(filename=log_filename, level=logging.DEBUG,
                            format=log_format)
        logging.captureWarnings(True)
        # global connection attempt counter
        self.nconnection = 0
        # timeout for HTTP connection before failing (seconds)
        self.timeout = timeout
        # files within the directory where data will be saved
        self.fileInPath = []
        for f in os.listdir(self.writeFilePath):
            if os.path.isfile(os.path.join(self.writeFilePath, f)):
                self.fileInPath.append(f)
        global GDAL
        if not GDAL and checkgdal:
            logging.warning("WARNING: Python GDAL library not found")
        elif GDAL and not checkgdal:
            GDAL = False
        self.dirData = []
        # set today and enday dates
        self._getToday()

[docs]    def removeEmptyFiles(self):
        """Function to remove files in the download directory that have
           filesize equal to 0
        """
        year = str(self.today.year)
        prefix = self.product.split('.')[0]
        files = glob.glob1(self.writeFilePath, '%s.A%s*' % (prefix, year))
        for f in files:
            fil = os.path.join(self.writeFilePath, f)
            if os.path.getsize(fil) == 0:
                os.remove(fil)

[docs]    def connect(self, ncon=20):
        """Connect to the server and fill the dirData variable

           :param int ncon: maximum number of attempts to connect to the HTTP
                            server before failing
        """
        if self.urltype == 'ftp':
            self._connectFTP(ncon)
        elif self.urltype == 'http':
            self._connectHTTP(ncon)
        if len(self.dirData) == 0:
            raise Exception("There are some troubles with the server. "
                            "The directory seems to be empty")

    def _connectHTTP(self, ncon=20):
        """Connect to HTTP server, create a list of directories for all days

           :param int ncon: maximum number of attempts to connect to the HTTP
                            server before failing. If ncon < 0, connection
                            attempts are unlimited in number
        """
        self.nconnection += 1
        try:
            url = urljoin(self.url, self.path)
            try:
                req = urllib.request.Request(url, headers=self.http_header)
                http = urllib.request.urlopen(req)
                self.dirData = modisHtmlParser(http.read()).get_dates()
            except Exception as e:
                logging.error('Error in connection. Code {code}, '
                              'reason {re}'.format(code=e.code, re=e.reason))
                http = urlopen(url, timeout=self.timeout)
                self.dirData = modisHtmlParser(http.read()).get_dates()
            self.dirData.reverse()
        except Exception as e:
            try:
                logging.error('Error in connection. Code {code}, '
                              'reason {re}'.format(code=e.code, re=e.reason))
            except:
                logging.error('Error {er}'.format(er=e))
            if self.nconnection <= ncon or ncon < 0:
                self._connectHTTP()

    def _connectFTP(self, ncon=20):
        """Set connection to ftp server, move to path where data are stored,
           and create a list of directories for all days

           :param int ncon: maximum number of attempts to connect to the FTP
                            server before failing.

        """
        if not self.user and not self.password:
            raise IOError("You must provide a user and password to connect.")
        self.nconnection += 1
        try:
            # connect to ftp server
            self.ftp = FTP(self.url)
            self.ftp.login(self.user, self.password)
            # enter in directory
            self.ftp.cwd(self.path)
            self.dirData = []
            # return data inside directory
            self.ftp.dir(self.dirData.append)
            # reverse order of data for have first the nearest to today
            self.dirData.reverse()
            # ensure dirData contains only directories, remove all references to files
            self.dirData = [elem.split()[-1] for elem in self.dirData if elem.startswith("d")]
            if self.debug:
                logging.debug("Open connection {url}".format(url=self.url))
        except (EOFError, ftplib.error_perm) as e:
            logging.error('Error in connection: {err}'.format(err=e))
            if self.nconnection <= ncon:
                self._connectFTP()

[docs]    def closeFTP(self):
        """Close ftp connection and close the file list document"""
        self.ftp.quit()
        self.closeFilelist()
        if self.debug:
            logging.debug("Close connection {url}".format(url=self.url))

[docs]    def closeFilelist(self):
        """Function to close the file list of where the files are downloaded"""
        self.filelist.close()

[docs]    def setDirectoryIn(self, day):
        """Enter into the file directory of a specified day

           :param str day: a string representing a day in format YYYY.MM.DD
        """
        try:
            self.ftp.cwd(day)
        except (ftplib.error_reply, socket.error) as e:
            logging.error("Error {err} entering in directory "
                          "{name}".format(err=e, name=day))
            self.setDirectoryIn(day)

[docs]    def setDirectoryOver(self):
        """Move up within the file directory"""
        try:
            self.ftp.cwd('..')
        except (ftplib.error_reply, socket.error) as e:
            logging.error("Error {err} when trying to come back".format(err=e))
            self.setDirectoryOver()

    def _getToday(self):
        """Set the dates for the start and end of downloading"""
        if self.today is None:
            # set today variable from datetime.date method
            self.today = date.today()
        elif isinstance(self.today, str):
            # set today variable from string data passed by user
            self.today = str2date(self.today)
        # set enday variable to data passed by user
        if isinstance(self.enday, str):
            self.enday = str2date(self.enday)
        # set delta
        if self.today and self.enday:
            if self.today < self.enday:
                self.today, self.enday = self.enday, self.today
            delta = self.today - self.enday
            self.delta = abs(delta.days) + 1

[docs]    def getListDays(self):
        """Return a list of all selected days"""
        today_s = self.today.strftime("%Y.%m.%d")
        # dirData is reverse sorted
        for i, d in enumerate(self.dirData):
            if d <= today_s:
                today_index = i
                break
#            else:
#                logging.error("No data available for requested days")
#                import sys
#                sys.exit()
        days = self.dirData[today_index:][:self.delta]
        # this is useful for 8/16 days data, delta could download more images
        # that you want
        if self.enday is not None:
            enday_s = self.enday.strftime("%Y.%m.%d")
            delta = 0
            # make a full cycle from the last index and find
            # it make a for cicle from the last value and find the internal
            # delta to remove file outside temporaly range
            for i in range(0, len(days)):
                if days[i] < enday_s:
                    break
                else:
                    delta = delta + 1
            # remove days outside new delta
            days = days[:delta]
        return days

[docs]    def getAllDays(self):
        """Return a list of all days"""
        return self.dirData

[docs]    def getFilesList(self, day=None):
        """Returns a list of files to download. HDF and XML files are
           downloaded by default. JPG files will be downloaded if
           self.jpeg == True.

           :param str day: the date of data in format YYYY.MM.DD

           :return: a list of files to download for the day
        """
        if self.urltype == 'http':
            return self._getFilesListHTTP(day)
        elif self.urltype == 'ftp':
            return self._getFilesListFTP()

    def _getFilesListHTTP(self, day):
        """Returns a list of files to download from http server, which will
           be HDF and XML files, and optionally JPG files if specified by
           self.jpeg

           :param str day: the date of data in format YYYY.MM.DD
        """
        # return the files list inside the directory of each day
        try:
            url = urljoin(self.url, self.path, day)
            if self.debug:
                logging.debug("The url is: {url}".format(url=url))
            try:
                http = modisHtmlParser(requests.get(url,
                                                    timeout=self.timeout).content)
            except:
                http = modisHtmlParser(urlopen(url,
                                               timeout=self.timeout).read())
            # download JPG files also
            if self.jpeg:
                # if tiles not specified, download all files
                if not self.tiles:
                    finalList = http.get_all()
                # if tiles specified, download all files with jpegs
                else:
                    finalList = http.get_tiles(self.product_code,
                                               self.tiles, jpeg=True)
            # if JPG files should not be downloaded, get only HDF and XML
            else:
                finalList = http.get_tiles(self.product_code, self.tiles)
            if self.debug:
                logging.debug("The number of file to download is: "
                              "{num}".format(num=len(finalList)))

            return finalList
        except (socket.error) as e:
            logging.error("Error {err} when try to receive list of "
                          "files".format(err=e))
            self._getFilesListHTTP(day)

    def _getFilesListFTP(self):
        """Create a list of files to download from FTP server, it is possible
           choose to download also the JPG overview files or only the HDF files
        """
        def cicle_file(jpeg=False):
            """Check the type of file"""
            finalList = []
            for i in self.listfiles:
                name = i.split('.')
                # distinguish jpeg files from hdf files by the number of index
                # where find the tile index
                if not self.tiles and not (name.count('jpg') or
                                           name.count('BROWSE')):
                    finalList.append(i)
                # is a jpeg of tiles number
                if self.tiles:
                    if self.tiles.count(name[3]) == 1 and jpeg:
                        finalList.append(i)
                # is a hdf of tiles number
                    elif self.tiles.count(name[2]) == 1:
                        finalList.append(i)
            return finalList

        # return the file's list inside the directory of each day
        try:
            self.listfiles = self.ftp.nlst()
            # download also jpeg
            if self.jpeg:
                # finallist is ugual to all file with jpeg file
                if not self.tiles:
                    finalList = self.listfiles
                # finallist is ugual to tiles file with jpeg file
                else:
                    finalList = cicle_file(jpeg=True)
            # not download jpeg
            else:
                finalList = cicle_file()
            if self.debug:
                logging.debug("The number of file to download is: "
                              "{num}".format(num=len(finalList)))
            return finalList
        except (ftplib.error_reply, socket.error) as e:
            logging.error("Error {err} when trying to receive list of "
                          "files".format(err=e))
            self._getFilesListFTP()

[docs]    def checkDataExist(self, listNewFile, move=False):
        """Check if a file already exists in the local download directory

           :param list listNewFile: list of all files, returned by getFilesList
                                    function
           :param bool move: it is useful to know if a function is called from
                             download or move function
           :return: list of files to download
        """
        # different return if this method is used from downloadsAllDay() or
        # moveFile()
        if not listNewFile and not self.fileInPath:
            logging.error("checkDataExist both lists are empty")
        elif not listNewFile:
            listNewFile = list()
        elif not self.fileInPath:
            self.fileInPath = list()
        if not move:
            listOfDifferent = list(set(listNewFile) - set(self.fileInPath))
        elif move:
            listOfDifferent = list(set(self.fileInPath) - set(listNewFile))
        return listOfDifferent

[docs]    def checkFile(self, filHdf):
        """Check by using GDAL to be sure that the download went ok

           :param str filHdf: name of the HDF file to check

           :return: 0 if file is correct, 1 for error
        """
        try:
            gdal.Open(filHdf)
            return 0
        except (RuntimeError) as e:
            logging.error(e)
            return 1

[docs]    def downloadFile(self, filDown, filHdf, day):
        """Download a single file

           :param str filDown: name of the file to download
           :param str filHdf: name of the file to write to
           :param str day: the day in format YYYY.MM.DD
        """
        if self.urltype == 'http':
            self._downloadFileHTTP(filDown, filHdf, day)
        elif self.urltype == 'ftp':
            self._downloadFileFTP(filDown, filHdf)

    def _downloadFileHTTP(self, filDown, filHdf, day):
        """Download a single file from the http server

           :param str filDown: name of the file to download
           :param str filHdf: name of the file to write to
           :param str day: the day in format YYYY.MM.DD
        """
        filSave = open(filHdf, "wb")
        url = urljoin(self.url, self.path, day, filDown)
        orig_size = None
        try:  # download and write the file
            req = urllib.request.Request(url, headers=self.http_header)
            http = urllib.request.urlopen(req)
            orig_size = http.headers['Content-Length']
            filSave.write(http.read())
        # if local file has an error, try to download the file again
        except Exception as e:
            logging.warning("Tried to downlaod with urllib but got this "
                            "error {co}, reason {re}".format(co=e.code,
                                                             re=e.reason))
            try:
                http = requests.get(url, timeout=self.timeout)
                orig_size = http.headers['Content-Length']
                filSave.write(http.content)
            except Exception as e:
                logging.warning("Tried to downlaod with requests but got this "
                                "error {co}, reason {re}".format(co=e.code,
                                                                 re=e.reason))
                logging.error("Cannot download {name}. "
                              "Retrying...".format(name=filDown))
                filSave.close()
                os.remove(filSave.name)
                import time
                time.sleep(5)
                self._downloadFileHTTP(filDown, filHdf, day)
        filSave.close()
        transf_size = os.path.getsize(filSave.name)
        if not orig_size:
            self.filelist.write("{name}\n".format(name=filDown))
            self.filelist.flush()
            if self.debug:
                logging.debug("File {name} downloaded but not "
                              "check the size".format(name=filDown))
            return 0
        if int(orig_size) == int(transf_size):
            # if no xml file, delete the HDF and redownload
            if filHdf.find('.xml') == -1:
                test = False
                if GDAL:
                    test = self.checkFile(filHdf)
                if test:
                    os.remove(filSave.name)
                    self._downloadFileHTTP(filDown, filHdf, day)
                else:
                    self.filelist.write("{name}\n".format(name=filDown))
                    self.filelist.flush()
                    if self.debug:
                        logging.debug("File {name} downloaded "
                                      "correctly".format(name=filDown))
                    return 0
            else:  # xml exists
                self.filelist.write("{name}\n".format(name=filDown))
                self.filelist.flush()
                if self.debug:
                    logging.debug("File {name} downloaded "
                                  "correctly".format(name=filDown))
                return 0
        # if filesizes are different, delete and try again
        else:
            logging.warning("Different size for file {name} - original data: "
                            "{orig}, downloaded: {down}".format(name=filDown,
                                                                orig=orig_size,
                                                                down=transf_size))
            os.remove(filSave.name)
            self._downloadFileHTTP(filDown, filHdf, day)

    def _downloadFileFTP(self, filDown, filHdf):
        """Download a single file from ftp server

           :param str filDown: name of the file to download
           :param str filHdf: name of the file to write to
        """
        filSave = open(filHdf, "wb")
        try:  # transfer file from ftp
            self.ftp.retrbinary("RETR " + filDown, filSave.write)
            self.filelist.write("{name}\n".format(name=filDown))
            self.filelist.flush()
            if self.debug:
                logging.debug("File {name} downloaded".format(name=filDown))
        # if error during download process, try to redownload the file
        except (ftplib.error_reply, socket.error, ftplib.error_temp,
                EOFError) as e:
            logging.error("Cannot download {name}, the error was '{err}'. "
                          "Retrying...".format(name=filDown, err=e))
            filSave.close()
            os.remove(filSave.name)
            try:
                self.ftp.pwd()
            except (ftplib.error_temp, EOFError) as e:
                self._connectFTP()
            self._downloadFileFTP(filDown, filHdf)
        filSave.close()
        orig_size = self.ftp.size(filDown)
        transf_size = os.path.getsize(filSave.name)
        if orig_size == transf_size:
            return 0
        else:
            logging.warning("Different size for file {name} - original data: "
                            "{orig}, downloaded: {down}".format(name=filDown,
                                                                orig=orig_size,
                                                                down=transf_size))
            os.remove(filSave.name)
            self._downloadFileFTP(filDown, filHdf)

[docs]    def dayDownload(self, day, listFilesDown):
        """Downloads tiles for the selected day

           :param str day: the day in format YYYY.MM.DD
           :param list listFilesDown: list of the files to download, returned
                                      by checkDataExist function
        """
        # for each file in files' list
        for i in listFilesDown:
            fileSplit = i.split('.')
            filePrefix = "{a}.{b}.{c}.{d}".format(a=fileSplit[0],
                                                  b=fileSplit[1],
                                                  c=fileSplit[2],
                                                  d=fileSplit[3])

            # check if this file already exists in the save directory
            oldFile = glob.glob1(self.writeFilePath, filePrefix + "*"
                                 + fileSplit[-1])
            numFiles = len(oldFile)
            # if it doesn't exist
            if numFiles == 0:
                file_hdf = os.path.join(self.writeFilePath, i)
            # if one does exist
            elif numFiles == 1:
                # check the version of file, delete local file if it is older
                fileDown = getNewerVersion(oldFile[0], i)
                if fileDown != oldFile[0]:
                    os.remove(os.path.join(self.writeFilePath, oldFile[0]))
                    file_hdf = os.path.join(self.writeFilePath, fileDown)
            elif numFiles > 1:
                logging.error("There are to many files for "
                              "{name}".format(name=i))
            if numFiles == 0 or (numFiles == 1 and fileDown != oldFile[0]):
                self.downloadFile(i, file_hdf, day)

[docs]    def downloadsAllDay(self, clean=False, allDays=False):
        """Download all requested days

           :param bool clean: if True remove the empty files, they could have
                              some problems in the previous download
           :param bool allDays: download all passable days
        """
        if clean:
            self.removeEmptyFiles()
        # get the days to download
        if allDays:
            days = self.getAllDays()
        else:
            days = self.getListDays()
        # log the days to download
        if self.debug:
            logging.debug("The number of days to download is: "
                          "{num}".format(num=len(days)))
        # download the data
        if self.urltype == 'http':
            self._downloadAllDaysHTTP(days)
        elif self.urltype == 'ftp':
            self._downloadAllDaysFTP(days)

    def _downloadAllDaysHTTP(self, days):
        """Downloads all the tiles considered from HTTP server

           :param list days: the list of days to download
        """
        # for each day
        for day in days:
            # obtain list of all files
            listAllFiles = self.getFilesList(day)
            # filter files based on local files in save directory
            listFilesDown = self.checkDataExist(listAllFiles)
            # download files for a day
            self.dayDownload(day, listFilesDown)
        self.closeFilelist()
        if self.debug:
            logging.debug("Download terminated")
        return 0

    def _downloadAllDaysFTP(self, days):
        """Downloads all the tiles considered from FTP server

           :param list days: the list of days to download
        """
        # for each day
        for day in days:
            # enter in the directory of day
            self.setDirectoryIn(day)
            # obtain list of all files
            listAllFiles = self.getFilesList()
            # filter files based on local files in save directory
            listFilesDown = self.checkDataExist(listAllFiles)
            # download files for a day
            self.dayDownload(day, listFilesDown)
            self.setDirectoryOver()
        self.closeFTP()
        if self.debug:
            logging.debug("Download terminated")
        return 0

[docs]    def debugLog(self):
        """Function to create the debug file

        :return: a Logger object to use to write debug info
        """
        # create logger
        logger = logging.getLogger("PythonLibModis debug")
        logger.setLevel(logging.DEBUG)
        # create console handler and set level to debug
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        # create formatter
        formatter = logging.Formatter("%(asctime)s - %(name)s - "
                                      "%(levelname)s - %(message)s")
        # add formatter to console handler
        ch.setFormatter(formatter)
        # add console handler to logger
        logger.addHandler(ch)
        return logger

[docs]    def debugDays(self):
        """This function is useful to debug the number of days"""
        logger = self.debugLog()
        days = self.getListDays()
        # if length of list of days and the delta of days are different
        if len(days) != self.delta:
            # for each day
            for i in range(1, self.delta + 1):
                # calculate the current day using datetime.timedelta
                delta = timedelta(days=i)
                day = self.today - delta
                day = day.strftime("%Y.%m.%d")
                # check if day is in the days list
                if day not in days:
                    logger.critical("This day {day} is not present on "
                                    "list".format(day=day))
        # the length of list of days and delta are equal
        else:
            logger.info("debugDays() : getListDays() and self.delta are same "
                        "length")

[docs]    def debugMaps(self):
        """ Prints the files to download to the debug stream"""
        logger = self.debugLog()
        days = self.getListDays()
        for day in days:
            listAllFiles = self.getFilesList(day)
            string = day + ": " + str(len(listAllFiles)) + "\n"
            logger.debug(string)