#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2017 ScyllaDB
#

#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.

import os
import re
from scylla_util import *
import subprocess
import argparse
import yaml
import logging
import sys
import scylla_blocktune as blocktune


# Regular expression helpers
# non-advancing comment matcher
_nocomment = r"^\s*(?!#)"
# non-capturing grouping
_scyllaeq = r"(?:\s*|=)"
_cpuset = r"(?:\s*--cpuset" + _scyllaeq + r"(?P<cpuset>\d+(?:[-,]\d+)*))"
_smp = r"(?:\s*--smp" + _scyllaeq + r"(?P<smp>\d+))"


def _reopt(s):
    return s + r"?"


def is_developer_mode():
    f = open(etcdir() + "/scylla.d/dev-mode.conf", "r")
    pattern = re.compile(_nocomment + r".*developer-mode" + _scyllaeq + "(1|true)")
    return len([x for x in f if pattern.match(x)]) >= 1

class scylla_cpuinfo:
    """Class containing information about how Scylla sees CPUs in this machine.
    Information that can be probed include in which hyperthreads Scylla is configured
    to run, how many total threads exist in the system, etc"""

    def __parse_cpuset(self):
        f = open(etcdir() + "/scylla.d/cpuset.conf", "r")
        pattern = re.compile(_nocomment + r"CPUSET=\s*\"" + _reopt(_cpuset) + _reopt(_smp) + "\s*\"")
        grp = [pattern.match(x) for x in f.readlines() if pattern.match(x)]
        if not grp:
            d = {"cpuset": None, "smp": None}
        else:
            # if more than one, use last
            d = grp[-1].groupdict()
        actual_set = set()
        if d["cpuset"]:
            groups = d["cpuset"].split(",")
            for g in groups:
                ends = [int(x) for x in g.split("-")]
                actual_set = actual_set.union(set(range(ends[0], ends[-1] + 1)))
            d["cpuset"] = actual_set
        if d["smp"]:
            d["smp"] = int(d["smp"])
        self._cpu_data = d

    def __system_cpus(self):
        cur_proc = -1
        f = open("/proc/cpuinfo", "r")
        results = {}
        for line in f:
            if line == '\n':
                continue
            key, value = [x.strip() for x in line.split(":")]
            if key == "processor":
                cur_proc = int(value)
                results[cur_proc] = {}
            results[cur_proc][key] = value
        return results

    def __init__(self):
        self.__parse_cpuset()
        self._cpu_data["system"] = self.__system_cpus()

    def system_cpuinfo(self):
        """Returns parsed information about CPUs in the system"""
        return self._cpu_data["system"]

    def system_nr_threads(self):
        """Returns the number of threads available in the system"""
        return len(self._cpu_data["system"])

    def system_nr_cores(self):
        """Returns the number of cores available in the system"""
        return len(set([x['core id'] for x in list(self._cpu_data["system"].values())]))

    def cpuset(self):
        """Returns the current cpuset Scylla is configured to use. Returns None if no constraints exist"""
        return self._cpu_data["cpuset"]

    def smp(self):
        """Returns the explicit smp configuration for Scylla, returns None if no constraints exist"""
        return self._cpu_data["smp"]

    def nr_shards(self):
        """How many shards will Scylla use in this machine"""
        if self._cpu_data["smp"]:
            return self._cpu_data["smp"]
        elif self._cpu_data["cpuset"]:
            return len(self._cpu_data["cpuset"])
        else:
            return len(self._cpu_data["system"])

def run_iotune():
            if "SCYLLA_CONF" in os.environ:
                conf_dir = os.environ["SCYLLA_CONF"]
            else:
                conf_dir = etcdir() + "/scylla"
            cfg = yaml.safe_load(open(os.path.join(conf_dir, "scylla.yaml")))
            default_path = cfg.get('workdir') or datadir()
            if not "data_file_directories" in cfg:
                cfg["data_file_directories"] = [os.path.join(default_path, 'data')]
            data_dirs = cfg["data_file_directories"]

            for t in [ "commitlog", "hints", "view_hints", "saved_caches" ]:
                key = "%s_directory" % t
                if key in cfg:
                    data_dirs += [ cfg[key] ]
                elif os.path.isdir(os.path.join(default_path, t)):
                    data_dirs += [ os.path.join(default_path, t) ]

            iotune_args = []
            for data_dir in data_dirs:
                if os.path.exists(data_dir) == False:
                    logging.error("%s was not found. Please check the configuration and run scylla_io_setup again.\n", data_dir)
                    sys.exit(1)
                if os.path.isdir(data_dir) == False:
                    logging.error("%s is not a directory. Please check the configuration and run scylla_io_setup again.\n", data_dir)
                    sys.exit(1)
                st = os.statvfs(data_dir)
                avail = st.f_bavail * st.f_frsize
                rec = 10000000000
                if avail < rec:
                    logging.error("Filesystem at %s has only %d bytes available; that is less than the recommended 10 GB. Please free up space and run scylla_io_setup again.\n", data_dir, avail)
                    sys.exit(1)
                blocktune.tune_fs(data_dir, '2')
                iotune_args += [ "--evaluation-directory", data_dir ]

            if cpudata.cpuset():
                iotune_args += [ "--cpuset", ",".join(map(str, cpudata.cpuset())) ]
            elif cpudata.smp():
                iotune_args += [ "--smp", str(cpudata.smp()) ]

            try:
                subprocess.check_call([bindir() + "/iotune",
                                       "--format", "envfile",
                                       "--options-file", etcdir() + "/scylla.d/io.conf",
                                       "--properties-file", etcdir() + "/scylla.d/io_properties.yaml"] + iotune_args)
            except Exception as e:
                logging.error(e)
                logging.error("%s did not pass validation tests, it may not be on XFS and/or has limited disk space.\n"
                              "This is a non-supported setup, and performance is expected to be very bad.\n"
                              "For better performance, placing your data on XFS-formatted directories is required.\n"
                              "To override this error, enable developer mode as follow:\n"
                              "sudo %s/scylla_dev_mode_setup --developer-mode 1", data_dirs, scriptsdir())
                sys.exit(1)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='IO Setup script for Scylla.')
    parser.add_argument('--ami', dest='ami', action='store_true',
                        help='configure AWS AMI')
    args = parser.parse_args()

    cpudata = scylla_cpuinfo()
    if not is_developer_mode():
        if args.ami:
            idata = aws_instance()

            if not idata.is_supported_instance_class():
                logging.error('{} is not supported instance type, run "scylla_io_setup" again without --ami option.'.format(idata.instance()))
                sys.exit(1)
            disk_properties = {}
            disk_properties["mountpoint"] = datadir()
            nr_disks = len(idata.ephemeral_disks())
            ## both i3 and i2 can run with 1 I/O Queue per shard
            if idata.instance() == "i3.large":
                disk_properties["read_iops"] = 111000
                disk_properties["read_bandwidth"] = 653925080
                disk_properties["write_iops"] = 36800
                disk_properties["write_bandwidth"] = 215066473
            elif idata.instance() == "i3.xlarge":
                disk_properties["read_iops"] = 200800
                disk_properties["read_bandwidth"] = 1185106376
                disk_properties["write_iops"] = 53180
                disk_properties["write_bandwidth"] = 423621267
            elif idata.instance_class() == "i3":
                disk_properties["read_iops"] = 411200 * nr_disks
                disk_properties["read_bandwidth"] = 2015342735 * nr_disks
                disk_properties["write_iops"] = 181500 * nr_disks
                disk_properties["write_bandwidth"] = 808775652 * nr_disks
            elif idata.instance_class() == "i3en":
                if idata.instance() == "i3en.large":
                    disk_properties["read_iops"] = 43315
                    disk_properties["read_bandwidth"] = 330301440
                    disk_properties["write_iops"] = 33177
                    disk_properties["write_bandwidth"] = 165675008
                elif idata.instance() in ("i3.xlarge", "i3en.2xlarge"):
                    disk_properties["read_iops"] = 84480 * nr_disks
                    disk_properties["read_bandwidth"] = 666894336 * nr_disks
                    disk_properties["write_iops"] = 66969 * nr_disks
                    disk_properties["write_bandwidth"] = 333447168 * nr_disks
                else:
                    disk_properties["read_iops"] = 257024 * nr_disks
                    disk_properties["read_bandwidth"] = 2043674624 * nr_disks
                    disk_properties["write_iops"] = 174080 * nr_disks
                    disk_properties["write_bandwidth"] = 1024458752 * nr_disks
            elif idata.instance_class() == "i2":
                disk_properties["read_iops"] = 64000 * nr_disks
                disk_properties["read_bandwidth"] = 507338935 * nr_disks
                disk_properties["write_iops"] = 57100 * nr_disks
                disk_properties["write_bandwidth"] = 483141731 * nr_disks
            properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
            yaml.dump({ "disks": [ disk_properties ] }, properties_file,  default_flow_style=False)
            ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
            ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
        elif gcp_instance().is_gce_instance():
            idata = gcp_instance()

            if idata.is_recommended_instance():
                disk_properties = {}
                disk_properties["mountpoint"] = datadir()
                nr_disks = idata.nvmeDiskCount
                # below is based on https://cloud.google.com/compute/docs/disks/local-ssd#performance
                # and https://cloud.google.com/compute/docs/disks/local-ssd#nvme
                # note that scylla iotune might measure more, this is GCP recommended
                mbs=1024*1024
                if nr_disks >= 1 & nr_disks < 4:
                    disk_properties["read_iops"] = 170000 * nr_disks
                    disk_properties["read_bandwidth"] = 660 * mbs * nr_disks
                    disk_properties["write_iops"] = 90000 * nr_disks
                    disk_properties["write_bandwidth"] = 350 * mbs * nr_disks
                elif nr_disks >= 4 & nr_disks <= 8:
                    disk_properties["read_iops"] = 680000
                    disk_properties["read_bandwidth"] = 2650 * mbs
                    disk_properties["write_iops"] = 360000
                    disk_properties["write_bandwidth"] = 1400 * mbs
                elif nr_disks == "16":
                    disk_properties["read_iops"] = 1600000
                    disk_properties["read_bandwidth"] = 4521251328
                    #below is google, above is our measured
                    #disk_properties["read_bandwidth"] = 6240 * mbs
                    disk_properties["write_iops"] = 800000
                    disk_properties["write_bandwidth"] = 2759452672
                    #below is google, above is our measured
                    #disk_properties["write_bandwidth"] = 3120 * mbs
                elif nr_disks == "24":
                    disk_properties["read_iops"] = 2400000
                    disk_properties["read_bandwidth"] = 5921532416
                    #below is google, above is our measured
                    #disk_properties["read_bandwidth"] = 9360 * mbs
                    disk_properties["write_iops"] = 1200000
                    disk_properties["write_bandwidth"] = 4663037952
                    #below is google, above is our measured
                    #disk_properties["write_bandwidth"] = 4680 * mbs
                properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
                yaml.dump({"disks": [disk_properties]}, properties_file,  default_flow_style=False)
                ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
                ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
            else:
                logging.error('This is not a recommended Google Cloud instance setup for auto tuning, running manual iotune.')
                run_iotune()
        else:
            run_iotune()
