/**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "minddata/dataset/engine/datasetops/source/album_op.h"
#include <fstream>
#include <iomanip>
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/db_connector.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "minddata/dataset/engine/opt/pass.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/image/image_utils.h"
#else
#include "minddata/dataset/kernels/image/lite_image_utils.h"
#endif

namespace mindspore {
namespace dataset {
AlbumOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  builder_num_workers_ = cfg->num_parallel_workers();
  builder_rows_per_buffer_ = cfg->rows_per_buffer();
  builder_op_connector_size_ = cfg->op_connector_size();
}

Status AlbumOp::Builder::Build(std::shared_ptr<AlbumOp> *ptr) {
  RETURN_IF_NOT_OK(SanityCheck());
  if (builder_sampler_ == nullptr) {
    const int64_t num_samples = 0;  // default num samples of 0 means to sample entire set of data
    const int64_t start_index = 0;
    builder_sampler_ = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
  }

  builder_schema_ = std::make_unique<DataSchema>();
  Path schema_file(builder_schema_file_);
  if (builder_schema_file_ == "" || !schema_file.Exists()) {
    RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + builder_schema_file_);
  } else {
    MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << ".";
    builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_);
  }
  *ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
                                   builder_op_connector_size_, builder_decode_, builder_extensions_,
                                   std::move(builder_schema_), std::move(builder_sampler_));
  return Status::OK();
}

Status AlbumOp::Builder::SanityCheck() {
  Path dir(builder_dir_);
  std::string err_msg;
  err_msg +=
    !dir.IsDirectory() ? "Invalid parameter, Album path is invalid or not set, path: " + builder_dir_ + ".\n" : "";
  err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " +
                                           std::to_string(builder_num_workers_) + ".\n"
                                       : "";
  return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
}

AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
                 const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
                 std::shared_ptr<SamplerRT> sampler)
    : ParallelOp(num_wkrs, queue_size, std::move(sampler)),
      rows_per_buffer_(rows_per_buffer),
      folder_path_(file_dir),
      decode_(do_decode),
      extensions_(exts),
      data_schema_(std::move(data_schema)),
      row_cnt_(0),
      buf_cnt_(0),
      sampler_ind_(0),
      dirname_offset_(0) {
  // Set the column name map (base class field)
  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
    column_name_id_map_[data_schema_->column(i).name()] = i;
  }
  io_block_queues_.Init(num_workers_, queue_size);
}

// Helper function for string comparison
// album sorts the files via numerical values, so this is not a simple string comparison
bool StrComp(const std::string &a, const std::string &b) {
  // returns 1 if string "a" represent a numeric value less than string "b"
  // the following will always return name, provided there is only one "." character in name
  // "." character is guaranteed to exist since the extension is checked befor this function call.
  int64_t value_a = std::stoi(a.substr(1, a.find(".")).c_str());
  int64_t value_b = std::stoi(b.substr(1, b.find(".")).c_str());
  return value_a < value_b;
}

// Single thread to go through the folder directory and gets all file names
// calculate numRows then return
Status AlbumOp::PrescanEntry() {
  Path folder(folder_path_);
  dirname_offset_ = folder_path_.length();
  std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
  if (folder.Exists() == false || dirItr == nullptr) {
    RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_);
  }
  MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";

  while (dirItr->hasNext()) {
    Path file = dirItr->next();
    if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
      (void)image_rows_.push_back(file.toString().substr(dirname_offset_));
    } else {
      MS_LOG(INFO) << "Album operator unsupported file found: " << file.toString()
                   << ", extension: " << file.Extension() << ".";
    }
  }

  std::sort(image_rows_.begin(), image_rows_.end(), StrComp);
  num_rows_ = image_rows_.size();
  if (num_rows_ == 0) {
    RETURN_STATUS_UNEXPECTED(
      "Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API.");
  }
  return Status::OK();
}

// Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work
Status AlbumOp::operator()() {
  RETURN_IF_NOT_OK(this->PrescanEntry());
  RETURN_IF_NOT_OK(LaunchThreadsAndInitOp());
  std::unique_ptr<DataBuffer> sampler_buffer;
  RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
  while (true) {  // each iterator is 1 epoch
    std::vector<int64_t> keys;
    keys.reserve(rows_per_buffer_);
    while (sampler_buffer->eoe() == false) {
      TensorRow sample_row;
      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
      TensorPtr sample_ids = sample_row[0];
      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) {
        if ((*itr) >= num_rows_) continue;  // index out of bound, skipping
        keys.push_back(*itr);
        row_cnt_++;
        if (row_cnt_ % rows_per_buffer_ == 0) {
          RETURN_IF_NOT_OK(
            io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
          keys.clear();
        }
      }
      RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
    }
    if (keys.empty() == false) {
      RETURN_IF_NOT_OK(
        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
    }
    if (IsLastIteration()) {
      std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
      std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block)));
      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block)));
      for (int32_t i = 0; i < num_workers_; ++i) {
        RETURN_IF_NOT_OK(
          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
      }
      return Status::OK();
    } else {  // not the last repeat.
      RETURN_IF_NOT_OK(
        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
    }

    if (epoch_sync_flag_) {
      // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for
      // the current epoch.
      RETURN_IF_NOT_OK(WaitForWorkers());
    }
    // If not the last repeat, self-reset and go to loop again.
    if (!IsLastIteration()) {
      RETURN_IF_NOT_OK(Reset());
      RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
    }
    UpdateRepeatAndEpochCounter();
  }
}

// contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_
// IMPORTANT: 1 IOBlock produces 1 DataBuffer
Status AlbumOp::WorkerEntry(int32_t worker_id) {
  TaskManager::FindMe()->Post();
  int64_t buffer_id = worker_id;
  std::unique_ptr<IOBlock> io_block;
  RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
  while (io_block != nullptr) {
    if (io_block->wait() == true) {
      // Sync io_block is a signal that master thread wants us to pause and sync with other workers.
      // The last guy who comes to this sync point should reset the counter and wake up the master thread.
      if (++num_workers_paused_ == num_workers_) {
        wait_for_workers_post_.Set();
      }
    } else if (io_block->eoe() == true) {
      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
      buffer_id = worker_id;
    } else if (io_block->eof() == true) {
      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
    } else {
      std::vector<int64_t> keys;
      RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
      if (keys.empty() == true) return Status::OK();  // empty key is a quit signal for workers
      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
      RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
      buffer_id += num_workers_;
    }
    RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
  }
  RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker.");
}

// Only support JPEG/PNG/GIF/BMP
// Optimization: Could take in a tensor
// This function does not return status because we want to just skip bad input, not crash
bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
  std::ifstream file_handle;
  constexpr int read_num = 3;
  *valid = false;
  file_handle.open(file_name, std::ios::binary | std::ios::in);
  if (!file_handle.is_open()) {
    return false;
  }
  unsigned char file_type[read_num];
  (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);

  if (file_handle.fail()) {
    file_handle.close();
    return false;
  }
  file_handle.close();
  if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
    // Normal JPEGs start with \xff\xd8\xff\xe0
    // JPEG with EXIF stats with \xff\xd8\xff\xe1
    // Use \xff\xd8\xff to cover both.
    *valid = true;
  }
  return true;
}

Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) {
  TensorPtr image;
  std::ifstream fs;
  fs.open(image_file_path, std::ios::binary | std::ios::in);
  if (fs.fail()) {
    MS_LOG(WARNING) << "File not found:" << image_file_path << ".";
    // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor
    RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
    return Status::OK();
  }
  fs.close();
  // Hack logic to replace png images with empty tensor
  Path file(image_file_path);
  std::set<std::string> png_ext = {".png", ".PNG"};
  if (png_ext.find(file.Extension()) != png_ext.end()) {
    // load empty tensor since image is not jpg
    MS_LOG(INFO) << "PNG!" << image_file_path << ".";
    RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
    return Status::OK();
  }
  // treat bin files separately
  std::set<std::string> bin_ext = {".bin", ".BIN"};
  if (bin_ext.find(file.Extension()) != bin_ext.end()) {
    // load empty tensor since image is not jpg
    MS_LOG(INFO) << "Bin file found" << image_file_path << ".";
    RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
    row->push_back(std::move(image));
    return Status::OK();
  }

  // check that the file is an image before decoding
  bool valid = false;
  bool check_success = CheckImageType(image_file_path, &valid);
  if (!check_success || !valid) {
    RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
    return Status::OK();
  }
  // if it is a jpeg image, load and try to decode
  RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
  if (decode_ && valid) {
    Status rc = Decode(image, &image);
    if (rc.IsError()) {
      RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
      return Status::OK();
    }
  }
  row->push_back(std::move(image));
  return Status::OK();
}

Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
  std::vector<std::string> data = json_obj;

  MS_LOG(INFO) << "String array label found: " << data << ".";
  TensorPtr label;
  RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
  row->push_back(std::move(label));
  return Status::OK();
}

Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
  std::string data = json_obj;
  // now we iterate over the elements in json

  MS_LOG(INFO) << "String label found: " << data << ".";
  TensorPtr label;
  RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, &label));
  row->push_back(std::move(label));
  return Status::OK();
}

Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
  TensorPtr label;
  // consider templating this function to handle all ints
  if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
    std::vector<int64_t> data;

    // Iterate over the integer list and add those values to the output shape tensor
    auto items = json_obj.items();
    using it_type = decltype(items.begin());
    (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });

    RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
  } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
    std::vector<int32_t> data;

    // Iterate over the integer list and add those values to the output shape tensor
    auto items = json_obj.items();
    using it_type = decltype(items.begin());
    (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });

    RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
  } else {
    RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " +
                             data_schema_->column(col_num).type().ToString());
  }
  row->push_back(std::move(label));
  return Status::OK();
}

Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
  TensorPtr float_array;
  // consider templating this function to handle all ints
  if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
    std::vector<double> data;

    // Iterate over the integer list and add those values to the output shape tensor
    auto items = json_obj.items();
    using it_type = decltype(items.begin());
    (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });

    RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
  } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
    std::vector<float> data;

    // Iterate over the integer list and add those values to the output shape tensor
    auto items = json_obj.items();
    using it_type = decltype(items.begin());
    (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });

    RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
  } else {
    RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " +
                             data_schema_->column(col_num).type().ToString());
  }
  row->push_back(std::move(float_array));
  return Status::OK();
}

Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row) {
  if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
    TensorPtr id;
    RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id));
    row->push_back(std::move(id));
    return Status::OK();
  }
  // hack to get the file name without extension, the 1 is to get rid of the backslash character
  int64_t image_id = std::stoi(file.substr(1, file.find(".")).c_str());
  TensorPtr id;
  RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, &id));
  MS_LOG(INFO) << "File ID " << image_id << ".";
  row->push_back(std::move(id));
  return Status::OK();
}

Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) {
  // hack to get the file name without extension, the 1 is to get rid of the backslash character
  TensorPtr empty_tensor;
  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor));
  row->push_back(std::move(empty_tensor));
  return Status::OK();
}

// Loads a tensor with float value, issue with float64, we don't have reverse look up to the type
// So we actually have to check what type we want to fill the tensor with.
// Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
// only be float32, seems like a weird limitation to impose
Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
  TensorPtr float_tensor;
  if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
    double data = json_obj;
    MS_LOG(INFO) << "double found: " << json_obj << ".";
    RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor));
  } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
    float data = json_obj;
    RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, &float_tensor));
    MS_LOG(INFO) << "float found: " << json_obj << ".";
  }
  row->push_back(std::move(float_tensor));
  return Status::OK();
}

// Loads a tensor with int value, we have to cast the value to type specified in the schema.
Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
  TensorPtr int_tensor;
  if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
    int64_t data = json_obj;
    MS_LOG(INFO) << "int64 found: " << json_obj << ".";
    RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, &int_tensor));
  } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
    int32_t data = json_obj;
    RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, &int_tensor));
    MS_LOG(INFO) << "int32 found: " << json_obj << ".";
  }
  row->push_back(std::move(int_tensor));
  return Status::OK();
}

// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
// possible optimization: the helper functions of LoadTensorRow should be optimized
// to take a reference to a column descriptor?
// the design of this class is to make the code more readable, forgoing minor perfomance gain like
// getting rid of duplicated checks
Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, TensorRow *row) {
  // testing here is to just print out file path
  (*row) = TensorRow(row_id, {});
  MS_LOG(INFO) << "Image row file: " << file << ".";

  std::ifstream file_handle(folder_path_ + file);
  if (!file_handle.is_open()) {
    RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file);
  }
  std::string line;
  while (getline(file_handle, line)) {
    try {
      nlohmann::json js = nlohmann::json::parse(line);
      MS_LOG(INFO) << "This Line: " << line << ".";

      // note if take a schema here, then we have to iterate over all column descriptors in schema and check for key
      // get columns in schema:
      int32_t columns = data_schema_->NumColumns();

      // loop over each column descriptor, this can optimized by switch cases
      for (int32_t i = 0; i < columns; i++) {
        // special case to handle
        if (data_schema_->column(i).name() == "id") {
          // id is internal, special case to load from file
          RETURN_IF_NOT_OK(LoadIDTensor(file, i, row));
          continue;
        }
        // find if key does not exist, insert placeholder nullptr if not found
        if (js.find(data_schema_->column(i).name()) == js.end()) {
          // iterator not found, push nullptr as placeholder
          MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
          RETURN_IF_NOT_OK(LoadEmptyTensor(i, row));
          continue;
        }
        nlohmann::json column_value = js.at(data_schema_->column(i).name());
        MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
        bool is_array = column_value.is_array();
        // load single string
        if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
          RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, row));
          continue;
        }
        // load string array
        if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
          RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, row));
          continue;
        }
        // load image file
        if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
          std::string image_file_path = column_value;
          RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, row));
          continue;
        }
        // load float value
        if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
                          data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
          RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, row));
          continue;
        }
        // load float array
        if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
                         data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
          RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, row));
          continue;
        }
        // int value
        if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
                          data_schema_->column(i).type() == DataType::DE_INT32)) {
          RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, row));
          continue;
        }
        // int array
        if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
                         data_schema_->column(i).type() == DataType::DE_INT32)) {
          RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, row));
          continue;
        } else {
          MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
          continue;
        }
      }
    } catch (const std::exception &err) {
      file_handle.close();
      RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file);
    }
  }
  file_handle.close();
  return Status::OK();
}

// Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
Status AlbumOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
  TensorRow trow;

  for (const int64_t &key : keys) {
    RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_rows_[key], &trow));
    deq->push_back(std::move(trow));
  }
  (*db)->set_tensor_table(std::move(deq));
  return Status::OK();
}

void AlbumOp::Print(std::ostream &out, bool show_all) const {
  // Always show the id and name as first line regardless if this summary or detailed print
  out << "(" << std::setw(2) << operator_id_ << ") <AlbumOp>:";
  if (!show_all) {
    // Call the super class for displaying any common 1-liner info
    ParallelOp::Print(out, show_all);
    // Then show any custom derived-internal 1-liner info for this op
    out << "\n";
  } else {
    // Call the super class for displaying any common detailed info
    ParallelOp::Print(out, show_all);
    // Then show any custom derived-internal stuff
    out << "\nNumber of rows:" << num_rows_ << "\nAlbum directory: " << folder_path_
        << "\nDecode: " << (decode_ ? "yes" : "no") << "\n\n";
  }
}

// Reset Sampler and wakeup Master thread (functor)
Status AlbumOp::Reset() {
  MS_LOG(DEBUG) << Name() << " performing a self-reset.";
  RETURN_IF_NOT_OK(sampler_->ResetSampler());
  row_cnt_ = 0;
  return Status::OK();
}

// hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows
Status AlbumOp::InitSampler() {
  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
}

Status AlbumOp::LaunchThreadsAndInitOp() {
  if (tree_ == nullptr) {
    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set.");
  }
  // registers QueueList and individual Queues for interrupt services
  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
  RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
  // launch main workers that load DataBuffers by reading all images
  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&AlbumOp::WorkerEntry, this, std::placeholders::_1)));
  TaskManager::FindMe()->Post();
  RETURN_IF_NOT_OK(this->InitSampler());  // pass numRows to Sampler
  return Status::OK();
}

// Visitor accept method for NodePass
Status AlbumOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
  return p->RunOnNode(shared_from_base<AlbumOp>(), modified);
}

Status AlbumOp::ComputeColMap() {
  // Set the column name map (base class field)
  if (column_name_id_map_.empty()) {
    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
      column_name_id_map_[data_schema_->column(i).name()] = i;
    }
  } else {
    MS_LOG(WARNING) << "Column name map is already set!";
  }
  return Status::OK();
}
}  // namespace dataset
}  // namespace mindspore
