/*
 * linux/drivers/misc/exchnd/exception_queue.c
 *
 * Copyright (C) 2016 Advanced Driver Information Technology GmbH
 * Written by Matthias Weise (mweise@de.adit-jv.com)
 *            Frederic Berat (fberat@de.adit-jv.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 */

/*
 * Queue handling exceptions sequentially
 *
 * This file contains the implementation of an exception queue that handles
 * incoming exceptions sequentially. It has a defined size of exceptions that
 * it can queue. The exceptions are handled in a separate thread which is also
 * defined here.
 */
#define pr_fmt(fmt) "exchnd exception queue: " fmt

#include <linux/exchnd.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/slab.h>

#include "internal.h"

/* Define module strings */
const char *exchnd_mod_names[] = EXCHND_MODULE_STRINGS;

static struct {
	/* Array of queue exceptions */
	struct exception_info queue[CONFIG_EXCHND_QUEUE_SIZE];
	/* Exception for the one possible fatal exception */
	struct exception_info fatal;
	unsigned long rd_idx;	/* Exception queue item in work */
	unsigned long wr_idx;	/* Exception queue next free item*/
	/* Queue for waiting for the exception daemon to finish */
	wait_queue_head_t daemon_wait_queue;
	/* Queue for waiting for the exception ready to be read. */
	wait_queue_head_t rd_wait_queue;
	/* Spinlock locks exception queue against
	 * parallel exception queue access
	 */
	spinlock_t write_lock;
	/* Worker thread that sequentially handles the exceptions */
	struct task_struct *worker_thread;
	/* Indicates that daemon has finished */
	bool dfinished;
	/* Indicates if further exceptions are rejected */
	atomic_t reject_excpts;
	/* Indicates whether EHM_SYS_RESTART is called */
	atomic_t restart_ehm;
} eq = { .write_lock = __SPIN_LOCK_UNLOCKED(eq.write_lock),
	.reject_excpts = ATOMIC_INIT(0),
	.restart_ehm   = ATOMIC_INIT(0) };

/**
 * exchnd_test_and_set_restart - Return restart_ehm value and set it to 1
 *
 * This function is used to avoid race conditions between the restart
 * notification and the exchnd_sys_restart_execute function. We don't want to
 * handle restart initiated by the driver, and we don't want to execute a
 * restart if one is already ongoing.
 *
 */
int exchnd_test_and_set_restart(void)
{
	return atomic_cmpxchg(&eq.restart_ehm, 0, 1);
}

/**
 * get_free_exc_slot - Get free slot from exception queue safely
 *
 * This function returns the next free exception information slot from
 * the exception queue. If the queue is full the oldest entries are
 * overwritten.
 *
 * The operation is thread safe because the selection of a free slot is
 * protected by a spinlock
 *
 * Return: Free exception info in the exception queue
 */
static struct exception_info *get_free_exc_slot(void)
{
	unsigned long flags;
	struct exception_info *free_info = NULL;
	enum exchnd_modules (*to_free)[EHM_LAST_ELEMENT] = NULL;

	spin_lock_irqsave(&eq.write_lock, flags);
	free_info = &eq.queue[eq.wr_idx & (CONFIG_EXCHND_QUEUE_SIZE - 1)];
	if (eq.wr_idx - eq.rd_idx >= CONFIG_EXCHND_QUEUE_SIZE) {
		pr_err("Exception queue overrun: Lost %ld entries !\n",
		       CONFIG_EXCHND_QUEUE_SIZE - (eq.wr_idx - eq.rd_idx) + 1);
		/* Move read index to latest entry */
		eq.rd_idx = eq.wr_idx;
	}
	eq.wr_idx++;
	/* Mark the structure not ready for processing. */
	free_info->ready = 0;
	/* If we overrun the queue, we need to ensure to release memory */
	if (free_info->mod_type == EXCHND_MOD_DYN) {
		to_free = free_info->modules;
		free_info->modules = NULL;
	}
	spin_unlock_irqrestore(&eq.write_lock, flags);

	kfree(to_free);

	return free_info;
}

/**
 * eq_get_info - Check kind of exception and return exception info store
 * @trigger: trigger that was called by the exception
 *
 * This function is the gate for incoming exceptions. It checks exceptions are
 * still accepted, blocks further exceptions in case of fatal exceptions and
 * provides storage for the exception information.
 *
 * Here we don't want that further exceptions halt the system before we finished
 * to handle the first fatal exception that came in. That also means that once a
 * fatal exception handling is finished, the system should be restarted.
 * The system can either be restarted by calling the EHM_SYS_RESTART module as
 * the last module to be called, or by letting the system restart by itself
 * through a kernel configuration.
 * The EHM_RESTART module will eventually call kernel_restart and/or
 * emergency_restart functions, which implies that no other processing may be
 * done.
 * In order to call the EHM_SYS_RESTART module, the user should add it to fatal
 * triggers or select a configuration that has it by default.
 * Trigger to really consider are EHT_KERNEL_OOPS and EHT_KERNEL_PANIC for which
 * the system may not restart by itself.
 * There is no need to have this module on EHT_RESTART trigger, as the system
 * will definitely restart.
 * The user probably doesn't want this module on EHT_POWER_OFF and EHT_HALT
 * triggers.
 *
 * One should also note that the EHT_RESTART trigger is disabled as far as a
 * fatal exception is received so that we can actually restart and not enter in
 * these dead loops for the restart itself.
 *
 * Return: A pointer to the storage of the exception information
 */
struct exception_info *eq_get_info(enum exchnd_triggers trigger)
{
	struct exception_info *info = NULL;

	if (atomic_read(&eq.reject_excpts) == 1) {
		/* No exception are accepted anymore -> halt this exception*/
		while (1) {
			set_current_state(TASK_UNINTERRUPTIBLE);
			schedule();
		}
	} else {
		if (exchnd_trigger_list[trigger].fatality == FT_FATAL) {
			if (atomic_cmpxchg(&eq.reject_excpts, 0, 1) == 0) {
				/* Got lock -> handle fatal exception */
				info = &eq.fatal;
				/* Write directly to EM in fatal case */
				info->write_func = em_write;
			} else {
				/* Lost race -> only second -> halt */
				while (1) {
					set_current_state(TASK_UNINTERRUPTIBLE);
					schedule();
				}
			}
		} else {
			/* This always returns a valid address */
			info = get_free_exc_slot();
			/* Write to user space in non-fatal case */
			info->write_func = rb_write;
		}
	}

	/* Info won't be NULL */
	info->trigger = trigger;
	info->task = current;
	info->sig = 0;
	info->siginfo = NULL;
	info->regs = NULL;
	/* Avoid parallel modification in case of queue overrun */
	eq_add_modules(info,
		       exchnd_trigger_list[trigger].modules,
		       EXCHND_MOD_STAT);
	info->msg[0] = '\0';
	info->syscall_index = get_syscall_index();
	info->tswitch_index = get_tswitch_index();

	return info;
}

/**
 * eq_add_modules - Safely attach module to exception_info
 *
 * As the exception queue may overrun, we need to ensure to not modify modules
 * concurrently has they can be created dynamically.
 *
 * @info: The element to add module list to
 * @modules: The modules list to add
 * @mode: The module allocation mode: EXCHND_MOD_STAT or EXCHND_MOD_DYN
 */
void eq_add_modules(struct exception_info *info,
		    enum exchnd_modules (*modules)[EHM_LAST_ELEMENT],
		    int mode)
{
	unsigned long flags;

	spin_lock_irqsave(&eq.write_lock, flags);
	info->mod_type = mode;
	info->modules = modules;
	spin_unlock_irqrestore(&eq.write_lock, flags);
}

/*
 * exchnd_filter_module - Check if the module has to be filtered out
 *
 * Some modules have no meaning to be executed, or may lead to issues depending
 * on system state. Aim here is to filter them out, specific tests.
 * Return 0 if module can be executed.
 */
static int exchnd_filter_module(struct exception_info *info,
				enum exchnd_modules module)
{
	int ret = 0;
	struct exchnd_trigger *tg = &exchnd_trigger_list[info->trigger];

	if (disabled_modules & (1 << module)) {
		pr_warn("%s is not enabled.\n", exchnd_mod_names[module]);
		return 1;
	}

	/* Specific tests based on module */
	switch (module) {
	case EHM_SYS_RESTART:
		ret = exchnd_test_and_set_restart();
		if (ret) {
			pr_err("Restart is already ongoing, skipping.");
		} else {
			if (tg->fatality == FT_FATAL) {
				ret = 1;
				pr_err("Skipping restart on fatal exception.");
			}
		}
		break;
	case EHM_FS_STATE:
	case EHM_MEMORY_MAP:
	case EHM_CGROUPS:
		/* These module are not relevant in these cases */
		if (tg->fatality == FT_FATAL) {
			ret = 1;
			pr_err("Ignoring %s during fatal exception.\n",
			       exchnd_mod_names[module]);
		}
		break;
	case EHM_BACKTRACE_ALL_THREADS:
	case EHM_BACKTRACE:
	case EHM_STACK_DUMP:
	case EHM_MEMORY_DUMP:
		if (exchnd_oom_ongoing() && info->task && info->task->mm) {
			ret = 1;
			pr_err("Ignoring %s during OOM.\n",
			       exchnd_mod_names[module]);
		}
		if (((info->trigger == EHT_EXTERNAL) ||
		     (info->trigger == EHT_ON_DEMAND)) &&
		    info->task && !info->task->mm) {
			ret = 1;
			pr_err("%s disabled on Kthreads for external trigger.",
			       exchnd_mod_names[module]);
		}
		break;
	default:
		/* Nothing to do */
		break;
	}

	return ret;
}

/**
 * call_modules - Call all configured modules of a trigger
 * @info: structure containing the exception information
 * @index: index of exception in exception queue
 * @start: start index where to start calling the modules
 * @trigger: trigger whose configured modules are called
 * @separator: allows to stop execution when a module type is executed
 *
 * Return: Last index + 1 that was executed or EHM_LAST_ELEMENT
 *
 * This function is helper function to call all modules of a given trigger.
 * It allows to provide a start index and allows to execute only modules
 * "smaller" that a given module type.
 */
static unsigned int call_modules(struct exception_info *info,
				 unsigned int start,
				 enum exchnd_triggers trigger,
				 enum exchnd_modules separator)
{
	unsigned int i;
	enum exchnd_modules (*modules)[EHM_LAST_ELEMENT] = info->modules;
	int (*execute)(struct exception_info *, enum exchnd_modules);

	/*
	 * EHM_NONE marks the end of the configured modules. Iterate to first
	 * action module if configured
	 */
	for (i = start; ((*modules)[i] != EHM_NONE) &&
	     ((*modules)[i] < separator) &&
	     (i < EHM_LAST_ELEMENT); i++) {
		enum exchnd_modules mod = (*modules)[i];

		if (exchnd_filter_module(info, mod))
			continue;

		/* Is module configured ? */
		execute = exchnd_module_list[mod].execute;

		if (execute) {
			unsigned char fail_buf[64] = { 0 };
			/* execute a module */
			int res = execute(info, mod);

			if (res == 0)
				continue;

			snprintf(fail_buf, sizeof(fail_buf),
				 "Executing module %s failed with error %d",
				 exchnd_mod_names[mod], res);
			exc_write_string(fail_buf, info);
		}
	}

	return i;
}

/**
 * handle_exc_locally - Handle fatal exception in same thread in kernel module
 *
 * This function handles exceptions in the context of the exception. It does
 * call the data collection and continues. This is normally done for fatal
 * exceptions because it can't be guaranteed that e.g. the scheduling does
 * still work.
 */
static void handle_exc_locally(void)
{
	/* Check if trigger is configured */
	if (!eq.fatal.modules)
		return;
	if ((*eq.fatal.modules)[0] == EHM_NONE)
		return;

	write_exc_header(&eq.fatal);
	call_modules(&eq.fatal, 0, eq.fatal.trigger, EHM_LAST_ELEMENT);
}

/**
 * handle_exception - Do all generic things to handle one exception
 * @index: index of exception in exception queue
 *
 * This function handles all common stuff for an exception. It calls the
 * modules and synchronizes with the daemon.
 */
static void handle_exception(unsigned long index)
{
	unsigned int last_mod;
	enum exchnd_triggers trigger = eq.queue[index].trigger;
	enum exchnd_modules (*modules)[EHM_LAST_ELEMENT] =
		eq.queue[index].modules;
	enum exchnd_modules (*to_free)[EHM_LAST_ELEMENT] = NULL;
	struct exchnd_message_header eod_header;
	char buf[] = "No system restart configured.\n";
	unsigned long flags;

	/* Check if trigger is configured */
	if (!modules || ((*modules)[0] == EHM_NONE))
		return;

	write_exc_header(&eq.queue[index]);
	last_mod = call_modules(&eq.queue[index], 0, trigger, EHM_ACTION_START);
	/* Send end of data exception message */
	eod_header.length                = 1;
	eod_header.type                  = EHM_ACTION_START;
	eod_header.trigger               = trigger;
	if (eq.queue[index].task)
		eod_header.pid           = eq.queue[index].task->pid;
	else
		eod_header.pid           = 0;
	eod_header.seq_num               = 0;
	eod_header.flags.collected       = 1;
	eod_header.flags.internal        = 1;
	eod_header.flags.addition_needed = 1;
	if ((last_mod >= EHM_LAST_ELEMENT) ||
	    ((*modules)[last_mod] < EHM_ACTION_START)) {
		/* There is not action configured for this trigger */
		eq.queue[index].write_func(&eod_header, "0");
	} else {
		unsigned long flags;
		bool is_daemon_active;

		/* Trigger has configured an action */
		eq.queue[index].write_func(&eod_header, "1");

		/* If no-one is there to handle this event, just continue */
		spin_lock_irqsave(&rb_get_read_wait_queue()->lock, flags);
		/* waitqueue_active checked under wq lock. */
		is_daemon_active = waitqueue_active(rb_get_read_wait_queue());
		spin_unlock_irqrestore(&rb_get_read_wait_queue()->lock, flags);

		if (is_daemon_active)
			/* Wait for daemon to finish */
			wait_event_interruptible(eq.daemon_wait_queue,
						 eq.dfinished);

		eq.dfinished = false;
		/* Now execute action modules */
		call_modules(&eq.queue[index], last_mod, trigger,
			     EHM_LAST_ELEMENT);
	}

	eod_header.type                  = EHM_SYS_RESTART;
	eod_header.flags.addition_needed = 0;
	eod_header.flags.internal        = 0;
	eod_header.length                = strlen(buf);
	eq.queue[index].write_func(&eod_header, buf);

	/* Free any dynamically allocated modules. */
	/* Avoid double free and overrun concurrency */
	spin_lock_irqsave(&eq.write_lock, flags);
	if (eq.queue[index].mod_type == EXCHND_MOD_DYN) {
		to_free = modules;
		eq.queue[index].modules = NULL;
	}
	spin_unlock_irqrestore(&eq.write_lock, flags);

	kfree(to_free);
}

/**
 * eq_start_handling - Start handling exception
 * @trigger: trigger that is was called by the exception
 *
 * This function is called after filling in the exception information to
 * start the handling of the exception. Non-fatal exception are using the
 * exception worker thread and fatal exceptions stay in the exception
 * context
 */
void eq_start_handling(enum exchnd_triggers trigger)
{
	if (exchnd_trigger_list[trigger].fatality == FT_FATAL) {
		handle_exc_locally();
	} else {
		/* Do not care if it is already running */
		wake_up(&eq.rd_wait_queue);
	}
}

/**
 * eq_sync_wake_up - Wake up after synchronization with daemon
 *
 * This function wakes up the exception handling thread after waiting for
 * synchronization with the exception handler daemon
 */
void eq_sync_wake_up(void)
{
	eq.dfinished = true;
	wake_up(&eq.daemon_wait_queue);
}

/*
 * eq_wake_daemon_wait_queue - Wakeup the daemon wait queue
 *
 * Wakes up the daemon wake queue if the daemon is not there any longer and
 * if the wait queue is active.
 *
 */
void eq_wake_daemon_wait_queue(void)
{
	unsigned long flags;
	unsigned long rd_flags;

	spin_lock_irqsave(&rb_get_read_wait_queue()->lock, rd_flags);
	spin_lock_irqsave(&eq.daemon_wait_queue.lock, flags);
	/* waitqueue_active checked under wq lock. */
	if (!waitqueue_active(rb_get_read_wait_queue()) &&
	    /* waitqueue_active checked under wq lock. */
	    waitqueue_active(&eq.daemon_wait_queue)) {
		pr_err("WD wakes up daemon wait-queue as daemon is dead.\n");
		eq_sync_wake_up();
	}
	spin_unlock_irqrestore(&eq.daemon_wait_queue.lock, flags);
	spin_unlock_irqrestore(&rb_get_read_wait_queue()->lock, rd_flags);
}

/**
 * process_exception_queue - Take one exception from the queue and process it
 * @data: data provided at the thread start by the parent
 *
 * This the main loop of the exception handler module worker thread. It sleeps
 * until the exception queue is filled and then handles the exception by
 * collecting the data and write it to the ring buffer.
 *
 * Return: Result of the thread function (unused)
 */
static int process_exception_queue(void *data)
{
	while (!kthread_should_stop()) {
		int index = eq.rd_idx & (CONFIG_EXCHND_QUEUE_SIZE - 1);

		if (eq.queue[index].ready) {
			handle_exception(index);
			eq.rd_idx++;
		}
		/* Queue is empty -> wait */
		wait_event_interruptible(eq.rd_wait_queue,
					 ((eq.rd_idx != eq.wr_idx) &&
					  eq.queue[index].ready) ||
					 kthread_should_stop());
	}

	return 0;
}

/**
 * eq_init - Initializes the exception queue
 *
 * This function initializes the structures of the exception queue and starts
 * the exception thread.
 *
 * Return: 0 if successful, error code otherwise
 */
int eq_init(void)
{
	/* Reset exception queue indexes */
	eq.rd_idx = 0;
	eq.wr_idx = 0;

	/* Init wait queues */
	init_waitqueue_head(&eq.rd_wait_queue);
	init_waitqueue_head(&eq.daemon_wait_queue);

	/* Create and start working thread */
	eq.worker_thread = kthread_create(process_exception_queue,
					  NULL,
					  "exc_worker");

	if (IS_ERR(eq.worker_thread)) {
		pr_err("Could not start worker thread\n");
		return -ENOMEM;
	}

	wake_up_process(eq.worker_thread);

	return 0;
}

/**
 * eq_deinit - Deinitializes the exception queue
 *
 * This function cleans the structures of the exception queue and stops
 * the exception thread.
 */
void eq_deinit(void)
{
	/* Wait for 2.5s max */
	int max_wait = 50;
	bool is_active;
	unsigned long flags;

	/* Ensure the daemon is still alive */
	eq_wake_daemon_wait_queue();

	spin_lock_irqsave(&eq.rd_wait_queue.lock, flags);
	/* waitqueue_active checked under wq lock. */
	while (!waitqueue_active(&eq.rd_wait_queue) && max_wait--) {
		spin_unlock_irqrestore(&eq.rd_wait_queue.lock, flags);
		msleep(50);
		spin_lock_irqsave(&eq.rd_wait_queue.lock, flags);
	}

	/* waitqueue_active checked under wq lock. */
	is_active = waitqueue_active(&eq.rd_wait_queue);
	spin_unlock_irqrestore(&eq.rd_wait_queue.lock, flags);

	/* Stop the thread. */
	if (is_active)
		kthread_stop(eq.worker_thread);
}
