package com.alibaba.schedulerx.worker.timer;

import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import com.alibaba.schedulerx.protocol.Worker.ContainerCheckZombieRequest;
import com.alibaba.schedulerx.protocol.Worker.ContainerCheckZombieResponse;
import com.alibaba.schedulerx.protocol.Worker.ContainerReportTaskStatusRequest;
import com.alibaba.schedulerx.protocol.utils.FutureUtils;
import com.alibaba.schedulerx.worker.SchedulerxWorker;
import com.alibaba.schedulerx.worker.batch.ContainerStatusReqHandler;
import com.alibaba.schedulerx.worker.batch.ContainerStatusReqHandlerPool;
import com.alibaba.schedulerx.worker.container.ContainerFactory;
import com.alibaba.schedulerx.worker.container.ContainerPool;
import com.alibaba.schedulerx.worker.domain.WorkerConstants;

import akka.actor.ActorSelection;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;

/**
 * check local zombie container according task master response
 * @author yanxun on 2019/1/16.
 */
public class ZombieContainerCheckTimer extends AbstractTimerTask{
    private ContainerStatusReqHandlerPool statusReqBatchHandlerPool = ContainerStatusReqHandlerPool.INSTANCE;
    private ContainerPool containerPool = ContainerFactory.getContainerPool();
    
    @Override
    public String getName() {
        return "ZombieContainerCheckTimer";
    }
    
    @Override
    public long getInitialDelay() {
        // 5min
        return 60 * 5;
    }
    
    @Override
    public long getPeriod() {
        // 10 min
        return 60 * 10;
    }
    
    @Override
    public void run() {
        Map<String, List<Long>> masterPath2JobInstanceIds = Maps.newHashMap();
        Long jobInstanceId;
        String masterPath;
        for (Entry<Long, ContainerStatusReqHandler<ContainerReportTaskStatusRequest>> entry : statusReqBatchHandlerPool.getHandlers().entrySet()) {
            jobInstanceId = entry.getKey();
            masterPath = entry.getValue().getTaskMasterAkkaPath();
            if (!masterPath2JobInstanceIds.containsKey(masterPath)) {
                masterPath2JobInstanceIds.put(masterPath, Lists.newArrayList(jobInstanceId));
            } else {
                masterPath2JobInstanceIds.get(masterPath).add(jobInstanceId);
            }
        }
        for (Entry<String, List<Long>> entry : masterPath2JobInstanceIds.entrySet()) {
            String masterCheckPath = entry.getKey().replace(WorkerConstants.WORKER_AKKA_TASK_ROUTING_PATH, WorkerConstants.WORKER_AKKA_HEARTBEAT_ROUTING_PATH);
            List<Long> jobInstanceIds = entry.getValue();
            ContainerCheckZombieRequest request = ContainerCheckZombieRequest
                .newBuilder()
                .addAllJobInstanceId(jobInstanceIds)
                .build();
            ActorSelection selection = SchedulerxWorker.actorSystem.actorSelection(masterCheckPath);
            try {
                ContainerCheckZombieResponse response = (ContainerCheckZombieResponse)FutureUtils.awaitResult(selection,
                    request, 10);
                List<Long> zombieJobInstanceIds = response.getZombieJobInstanceIdList();
                if (!CollectionUtils.isEmpty(zombieJobInstanceIds)) {
                    LOGGER.warn("detect zombieJobInstanceIds:{}, clean...", StringUtils.join(zombieJobInstanceIds, ","));
                    for (Long zombieJobInstanceId : zombieJobInstanceIds) {
                        if (zombieJobInstanceId != 0) {
                            statusReqBatchHandlerPool.stop(zombieJobInstanceId);
                            containerPool.destroyByInstance(zombieJobInstanceId);
                        }
                    }
                }
            } catch (Throwable e) {
                LOGGER.error("ZombieContainerCheckTimer check error", e);
            }
        }
    }
}
