#include "Python.h"
#ifdef STACKLESS

#include "compile.h"
#include "frameobject.h"
#include "structmember.h"

#include "stackless_impl.h"
#include "stacklesseval.h"
#include "prickelpit.h"

/* platform specific constants */
#include "slp_platformselect.h"

/* Stackless extension for ceval.c */

/******************************************************

  Static Global Variables

*******************************************************/

/* the flag which decides whether we try to use soft switching */

int slp_enable_softswitch = 1;

/*
 * flag whether the next call should try to be stackless.
 * The protocol is: This flag may be only set if the called
 * thing supports it. It doesn't matter whether it uses the
 * chance, but it *must* set it to zero before returning.
 * This flags in a way srves as a parameter that we don't have.
 */
int slp_try_stackless = 0;

/******************************************************

  The C Stack

 ******************************************************/

/* adjust slots to typical size for your system */

#define CSTACK_SLOTS        1024
#define CSTACK_MAXCACHE     100
#define CSTACK_GOODGAP      4096

static PyCStackObject *cstack_cache[CSTACK_SLOTS] = { NULL };
static int cstack_cachecount = 0;

/* this function will get called by PyStacklessEval_Fini */
static void slp_cstack_cacheclear(void)
{
    int i;
    PyCStackObject *stack;
    for (i=0; i < CSTACK_SLOTS; i++) {
        while (cstack_cache[i] != NULL) {
            stack = cstack_cache[i];
            cstack_cache[i] = (PyCStackObject *) stack->startaddr;
            PyMem_Free(stack);
        }
    }
    cstack_cachecount = 0;
}

static void
cstack_dealloc(PyCStackObject *cst)
{
    PyThreadState *ts = PyThreadState_GET();
	ts->st.cstack_chain = cst;
	SLP_CHAIN_REMOVE(PyCStackObject, &ts->st.cstack_chain, cst, next, prev);
    if (cst->ob_size >= CSTACK_SLOTS) {
        PyMem_Free(cst);
    }
    else {
        if (cstack_cachecount >= CSTACK_MAXCACHE)
            slp_cstack_cacheclear();
        cst->startaddr = (intptr_t *) cstack_cache[cst->ob_size];
        cstack_cache[cst->ob_size] = cst;
        ++cstack_cachecount;
    }
}


PyCStackObject *
slp_cstack_new(PyCStackObject **cst, intptr_t *stackref, PyTaskletObject *task)
{
    PyThreadState *ts = PyThreadState_GET();
    intptr_t *stackbase = ts->st.cstack_base;
    ptrdiff_t size = stackbase - stackref;
	if (size < 0) {
        PyErr_SetString(PyExc_RuntimeError, "negative stack size");
        return NULL;
    }
    if (*cst && (*cst)->ob_size == size && (*cst)->ob_refcnt == 1) {
        /* reuse it */
		return *cst;
    }

    if (*cst != NULL) {
        if ((*cst)->task == task)
            (*cst)->task = NULL;
        Py_DECREF(*cst);
    }
    if (size < CSTACK_SLOTS && ((*cst) = cstack_cache[size])) {
        /* take stack from cache */
        cstack_cache[size] = (PyCStackObject *) (*cst)->startaddr;
        --cstack_cachecount;
    }
    else {
        /* PyObject_NewVar is inlined */
        *cst = (PyCStackObject *)
            PyObject_MALLOC(sizeof(PyCStackObject) + (size-1) * sizeof(intptr_t));
        if (*cst == NULL) return NULL;
    }
    (void) PyObject_INIT_VAR(*cst, &PyCStack_Type, (int)size);

    (*cst)->startaddr = stackbase;
	(*cst)->next = (*cst)->prev = NULL;
	SLP_CHAIN_INSERT(PyCStackObject, &ts->st.cstack_chain, *cst, next, prev);
    (*cst)->serial = ts->st.serial;
	(*cst)->task = task;
	(*cst)->tstate = ts;
	(*cst)->nesting_level = ts->st.nesting_level;
    return *cst;
}

size_t
slp_cstack_save(PyCStackObject *cstprev)
{
    size_t stsizeb = (cstprev)->ob_size * sizeof(intptr_t);
    memcpy((cstprev)->stack, (cstprev)->startaddr - (cstprev)->ob_size, stsizeb);
	return stsizeb;
}

void
slp_cstack_restore(PyCStackObject *cst)
{
	/* mark task as no longer responsible for cstack instance */
	cst->task = NULL;
    memcpy(cst->startaddr - cst->ob_size, &cst->stack, (cst->ob_size) * sizeof(intptr_t));
}


static char cstack_doc[] =
"A CStack object serves to save the stack slice which is involved\n\
during a recursive Python call. It will also be used for pickling\n\
of program state. This structure is highly platform dependant.\n\
Note: For inspection, str() can dump it as a string.\
";

#if SIZEOF_VOIDP == SIZEOF_INT
#define T_ADDR T_UINT
#else
#define T_ADDR T_ULONG
#endif


static PyMemberDef cstack_members[] = {
    {"size", T_INT, offsetof(PyCStackObject, ob_size), READONLY},
	{"next", T_OBJECT, offsetof(PyCStackObject, next), READONLY},
	{"prev", T_OBJECT, offsetof(PyCStackObject, prev), READONLY},
	{"task", T_OBJECT, offsetof(PyCStackObject, task), READONLY},
    {"startaddr", T_ADDR, offsetof(PyCStackObject, startaddr), READONLY},
    {0}
};

/* simple string interface for inspection */

static PyObject *
cstack_str(PyObject *o)
{
    PyCStackObject *cst = (PyCStackObject*)o;
    return PyString_FromStringAndSize((char*)&cst->stack, cst->ob_size*sizeof(cst->stack[0]));
}

PyTypeObject PyCStack_Type = {
	PyObject_HEAD_INIT(&PyType_Type)
	0,
	"stackless.cstack",
	sizeof(PyCStackObject),
	sizeof(PyObject *),
	(destructor)cstack_dealloc,         /* tp_dealloc */
	0,                  /* tp_print */
	0,                  /* tp_getattr */
	0,                  /* tp_setattr */
	0,                  /* tp_compare */
	0,                  /* tp_repr */
	0,                  /* tp_as_number */
	0,                  /* tp_as_sequence */
	0,                  /* tp_as_mapping */
	0,                  /* tp_hash */
	0,                  /* tp_call */
	(reprfunc)cstack_str,                   /* tp_str */
	PyObject_GenericGetAttr,        /* tp_getattro */
	PyObject_GenericSetAttr,        /* tp_setattro */
	0,                  /* tp_as_buffer */
	Py_TPFLAGS_DEFAULT, /* tp_flags */
	cstack_doc,         /* tp_doc */
	0,                  /* tp_traverse */
	0,                  /* tp_clear */
	0,                  /* tp_richcompare */
	0,                  /* tp_weaklistoffset */
	0,                  /* tp_iter */
	0,                  /* tp_iternext */
	0,                  /* tp_methods */
	cstack_members,     /* tp_members */
	0,                  /* tp_getset */
	0,                  /* tp_base */
	0,                  /* tp_dict */
	0,                  /* tp_descr_get */
	0,                  /* tp_descr_set */
	0,                  /* tp_dictoffset */
	0,                  /* tp_init */
	0,                  /* tp_alloc */
	0,                  /* tp_new */
	0,                  /* tp_free */
};


static int
make_initial_stub(void)
{
    PyThreadState *ts = PyThreadState_GET();
	if (ts->st.initial_stub != NULL) {
		Py_DECREF(ts->st.initial_stub);
		ts->st.initial_stub = NULL;
	}
	ts->st.serial_last_jump = ++ts->st.serial;
    if (slp_transfer(&ts->st.initial_stub, NULL, NULL)) return -1;
	/*
	 * from here, we always arrive with a compatible cstack
	 * that also can be used by main, if it is running
	 * in soft-switching mode.
	 * To insure that, it was necessary to re-create the
	 * initial stub for *every* run of a new main.
	 * This will vanish with greenlet-like stack management.
	 */

    return 0;
}

static PyObject *
climb_stack_and_eval_frame(PyFrameObject *f)
{
    /*
     * a similar case to climb_stack_and_transfer,
	 * but here we need to incorporate a gap in the
	 * stack into main and keep this gap on the stack.
	 * This way, initial_stub is always valid to be
	 * used to return to the main c stack.
     */
    PyThreadState *ts = PyThreadState_GET();
    intptr_t probe;
    ptrdiff_t needed = &probe - ts->st.cstack_base;
    /* in rare cases, the need might have vanished due to the recursion */
    intptr_t *goobledigoobs;
    if (needed > 0) {
        goobledigoobs = alloca(needed * sizeof(intptr_t));
        if (goobledigoobs == NULL)
            return NULL;
    }
    return slp_eval_frame(f);
}


PyObject *
slp_eval_frame(PyFrameObject *f)
{
    PyThreadState *ts = PyThreadState_GET();
    PyFrameObject *fprev = f->f_back;
    intptr_t *stackref;
    PyObject *result;

    if (fprev == NULL && ts->st.main == NULL) {
        /* this is the initial frame, so mark the stack base */

		/*
			careful, this caused me a major headache.
			it is *not* sufficient to just check for fprev == NULL.
			Reason: (observed with wxPython):
			A toplevel frame is run as a tasklet. When its frame
			is deallocated (in slp_tasklet_end), a Python object
			with a __del__ method is destroyed. This __del__
			will run as a toplevel frame, with fback == NULL!
		 */

        stackref = STACK_REFPLUS + (intptr_t *) &f;
		if (ts->st.cstack_base == NULL)
			ts->st.cstack_base = stackref - CSTACK_GOODGAP; /* XXX */
		if (stackref > ts->st.cstack_base)
			return climb_stack_and_eval_frame(f);

        ts->frame = f;
        if (make_initial_stub())
            return NULL;
        f = ts->frame;
        if (f == NULL)
            return NULL;
        if (f->f_back == NULL) {
            /* this is really a new tasklet or main */
            result = slp_run_tasklet(f);
            return result;
        }
        return slp_frame_dispatch_top(f);
    }
    return slp_frame_dispatch(f, fprev);
}

void slp_kill_tasks_with_stacks(struct _ts *tstate)
{
    PyThreadState *ts = PyThreadState_GET();
	PyObject *exception, *value, *tb;
    if (ts != tstate) {
        /* too bad, can't handle this, give up */
        return;
    }
    PyErr_Fetch(&exception, &value, &tb);
    while (1) {
        PyCStackObject *csfirst = ts->st.cstack_chain, *cs = csfirst;
        PyTaskletObject *t;
		if (cs == NULL)
			break;
        while (cs->task == NULL && cs->next != csfirst) {
            cs = cs->next;
        }
        if (cs->task == NULL)
            return;
        t = cs->task;
        cs->task = NULL;
        PyTasklet_Kill(t);
		PyErr_Clear();
    }
	PyErr_Restore(exception, value, tb);
}

void PyStackless_kill_tasks_with_stacks(void)
{
    slp_kill_tasks_with_stacks(PyThreadState_GET());
}

void
PyStacklessEval_Fini(void)
{
    slp_cstack_cacheclear();
}


/******************************************************

  Generator re-implementation for Stackless

*******************************************************/

typedef struct {
	PyObject_HEAD
	/* The gi_ prefix is intended to remind of generator-iterator. */

	struct _frame *gi_frame;

	/* True if generator is being executed. */
	int gi_running;

	/* List of weak reference. */
	PyObject *gi_weakreflist;
} PyGenObject;

/*
 * Note:
 * Generators are quite a bit slower in Stackless, because
 * we are jumping in and out so much.
 * I had an implementation with no extra cframe, but it
 * was not faster, but considerably slower than this solution.
 */

typedef struct _gen_callback_frame {
	PyBaseFrameObject bf;
	PyGenObject *gen;
} gen_callback_frame;
#define GEN_CALLBACK_FRAME_SIZE ((sizeof(gen_callback_frame)-sizeof(PyBaseFrameObject))/sizeof(PyObject*))

static PyObject* gen_iternext_callback(PyFrameObject *f);

DEF_INVALID_EXEC(gen_iternext_callback)

PyObject *
PyGenerator_New(PyFrameObject *f)
{
	static int initialized = 0;
	PyGenObject *gen = PyObject_GC_New(PyGenObject, &PyGenerator_Type);
	if (gen == NULL) {
		Py_DECREF(f);
		return NULL;
	}
	gen->gi_frame = f;
	gen->gi_running = 0;
	gen->gi_weakreflist = NULL;
	_PyObject_GC_TRACK(gen);

	f->f_back = (PyFrameObject *) slp_baseframe_new(gen_iternext_callback, 0, GEN_CALLBACK_FRAME_SIZE);
	if (f->f_back == NULL) {
		Py_DECREF(gen);
		return NULL;
	}

	/* initialize pickling of running generators */
	if (!initialized) {
        if (slp_register_execute(&PyBaseFrame_Type, "gen_iternext_callback",
			gen_iternext_callback, REF_INVALID_EXEC(gen_iternext_callback)) ) {
			/* ignore reference, this is a very bad error */
			return NULL;
		}
		initialized = 1;
	}

	return (PyObject *)gen;
}

static int
gen_traverse(PyGenObject *gen, visitproc visit, void *arg)
{
	return visit((PyObject *)gen->gi_frame, arg);
}

static void
gen_dealloc(PyGenObject *gen)
{
	_PyObject_GC_UNTRACK(gen);
	if (gen->gi_weakreflist != NULL)
		PyObject_ClearWeakRefs((PyObject *) gen);
	Py_DECREF(gen->gi_frame);
	PyObject_GC_Del(gen);
}

static PyObject *
gen_iternext(PyGenObject *gen)
{
	STACKLESS_GETARG();
	PyThreadState *ts = PyThreadState_GET();
	PyFrameObject *f = gen->gi_frame;
	PyFrameObject *stopframe = ts->frame;

	if (gen->gi_running) {
		PyErr_SetString(PyExc_ValueError,
				"generator already executing");
		return NULL;
	}
	if (f->f_stacktop == NULL)
		return NULL;

	/* Generators always return to their most recent caller, not
	 * necessarily their creator. */
	Py_XINCREF(ts->frame);
	assert(f->f_back != NULL);
	assert(f->f_back->f_back == NULL);
	f->f_back->f_back = ts->frame;

	gen->gi_running = 1;

	Py_INCREF(Py_None);
	ts->st.tempval = Py_None;
	f->f_execute = PyEval_EvalFrame;
	Py_INCREF(gen);
	((gen_callback_frame *) f->f_back)->gen = gen;
	Py_INCREF(f);
	ts->frame = f;
	if (stackless)
		return Py_UnwindToken;
	return slp_frame_dispatch(f, stopframe);
}

static PyObject*
gen_iternext_callback(PyFrameObject *f)
{
	PyThreadState *ts = PyThreadState_GET();
	gen_callback_frame *gcf = (gen_callback_frame*) f;
	PyGenObject *gen = gcf->gen;
	PyObject *result = ts->st.tempval;
	ts->st.tempval = NULL;
	gen->gi_running = 0;

	/* Don't keep the reference to f_back any longer than necessary.  It
	 * may keep a chain of frames alive or it could create a reference
	 * cycle. */
	ts->frame = f->f_back;
	Py_XDECREF(f->f_back);
	f->f_back = NULL;

	f = gen->gi_frame;
	/* If the generator just returned (as opposed to yielding), signal
	 * that the generator is exhausted. */
	if (result == Py_None && f->f_stacktop == NULL) {
		Py_DECREF(result);
		result = NULL;
		/* are we awaited by a for_iter or called by next() ? */
		if (ts->frame->f_execute != PyEval_EvalFrame_iter) {
			/* do the missing part of the next call */
			if (!PyErr_Occurred())
				PyErr_SetNone(PyExc_StopIteration);
		}
	}
	gcf->gen = NULL;
	Py_DECREF(gen);
	return result;
}

static PyObject *
generator_reduce(PyGenObject *gen)
{
    PyObject *tup;
    tup = Py_BuildValue("(O(Oi))",
        &PyGenerator_Type,
        gen->gi_frame,
        gen->gi_running
        );
    return tup;
}

static
PyObject *
generator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
	PyGenObject *gen = NULL;
    PyFrameObject *f;
    int gi_running;

    if (kwds != NULL) {
        PyErr_SetString(PyExc_ValueError, "Keyword parameters not supported for generator_new");
        return NULL;
    }

	if (!PyArg_ParseTuple(args, "O!i:generator",
			&PyFrame_Type, &f, &gi_running))
		return NULL;

	if (!gi_running) {
		if ((f = slp_ensure_new_frame(f)) != NULL) {
			/* PyGenerator_New eats an existing reference */
			if ((gen = (PyGenObject *) PyGenerator_New(f)) == NULL) {
				Py_DECREF(f);
			}
		}
		return (PyObject *) gen;
	}

	gen = PyObject_GC_New(PyGenObject, &PyGenerator_Type);
	if (gen == NULL) {
        Py_DECREF(f);
		return NULL;
	}
	/*
	 * The frame might now be initially unpickled (with PyNone as f_back),
	 * or it is already chained into a tasklet.
	 * Fortunately, we can simply leave it this way:
	 * since gi_running is set, there is no way to continue the
	 * generator without the corresponding tasklet.
	 */
	Py_INCREF(f);
    gen->gi_frame = f;
	gen->gi_running = gi_running;
    gen->gi_weakreflist = NULL;
    _PyObject_GC_TRACK(gen);
	return (PyObject *)gen;
}


static PyMemberDef gen_memberlist[] = {
	{"gi_frame",	T_OBJECT, offsetof(PyGenObject, gi_frame),	RO},
	{"gi_running",	T_INT,	  offsetof(PyGenObject, gi_running),	RO},
	{NULL}	/* Sentinel */
};

static struct PyMethodDef gen_methods[] = {
    {"__reduce__", (PyCFunction)generator_reduce, METH_NOARGS,
     "gi.__reduce__() -- reduce generator for pickling"},
	{NULL,			NULL}	/* Sentinel */
};


PyTypeObject PyGenerator_Type = {
	PyObject_HEAD_INIT(&PyType_Type)
	0,					/* ob_size */
	"generator",				/* tp_name */
	sizeof(PyGenObject),			/* tp_basicsize */
	0,					/* tp_itemsize */
	/* methods */
	(destructor)gen_dealloc,		/* tp_dealloc */
	0,					/* tp_print */
	0,					/* tp_getattr */
	0,					/* tp_setattr */
	0,					/* tp_compare */
	0,					/* tp_repr */
	0,					/* tp_as_number */
	0,					/* tp_as_sequence */
	0,					/* tp_as_mapping */
	0,					/* tp_hash */
	0,					/* tp_call */
	0,					/* tp_str */
	PyObject_GenericGetAttr,		/* tp_getattro */
	0,					/* tp_setattro */
	0,					/* tp_as_buffer */
	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_STACKLESS_CALL,/* tp_flags */
 	0,					/* tp_doc */
 	(traverseproc)gen_traverse,		/* tp_traverse */
 	0,					/* tp_clear */
	0,					/* tp_richcompare */
	offsetof(PyGenObject, gi_weakreflist),	/* tp_weaklistoffset */
	(getiterfunc)PyObject_SelfIter,		/* tp_iter */
	(iternextfunc)gen_iternext,		/* tp_iternext */
	gen_methods,					/* tp_methods */
	gen_memberlist,				/* tp_members */
	0,					/* tp_getset */
	0,					/* tp_base */
	0,					/* tp_dict */
	0,					/* tp_descr_get */
	0,					/* tp_descr_set */
	0,					/* tp_dictoffset */
	0,					/* tp_init */
	0,					/* tp_alloc */
	generator_new,				/* tp_new */
};


/******************************************************

  Rebirth of software stack avoidance

*******************************************************/


static PyObject *
unwind_repr(PyObject *op)
{
	return PyString_FromString(
		"The invisible unwind token. If you ever should see this,\n"
		"please report the error to tismer@tismer.com"
	);
}

/* dummy deallocator, just in case */
static void unwind_dealloc(PyObject *op) {
}

static PyTypeObject PyUnwindToken_Type = {
	PyObject_HEAD_INIT(&PyUnwindToken_Type)
	0,
	"UnwindToken",
	0,
	0,
	(destructor)unwind_dealloc, /*tp_dealloc*/ /*should never be called*/
	0,		/*tp_print*/
	0,		/*tp_getattr*/
	0,		/*tp_setattr*/
	0,		/*tp_compare*/
	(reprfunc)unwind_repr, /*tp_repr*/
	0,		/*tp_as_number*/
	0,		/*tp_as_sequence*/
	0,		/*tp_as_mapping*/
	0,		/*tp_hash */
};

PyObject *Py_UnwindToken = (PyObject *) &PyUnwindToken_Type;

/*
    the frame dispatcher will execute frames and manage
    the frame stack until the "previous" frame reappears.
    The "Mario" code if you know that game :-)
*/

PyObject *
slp_frame_dispatch(PyFrameObject *f, PyFrameObject *stopframe)
{
    PyObject * result;
    PyThreadState *ts = PyThreadState_GET();

	++ts->st.nesting_level;

/*
    frame protocol:
    If a frame returns the Py_UnwindToken object, this
    indicates that a different frame will be run.
    Semantics of an appearing Py_UnwindToken:
	The true return value is in the f_retval field.
	We always use the topmost tstate frame and bail
    out when we see the frame that issued the
	originating dispatcher call (which may be a NULL frame).
*/

    while (1) {

        result = f->f_execute(f);
        f = ts->frame;
		if (f == stopframe)
			break;
        else if (result != Py_UnwindToken) {
            ts->st.tempval = result;
        }
    }
	--ts->st.nesting_level;
	/* see whether we need to trigger a pending interrupt */
	if (ts->st.flags.pending_irq)
		slp_check_pending_irq();
	if (result == Py_UnwindToken) {
		/* XXX this is a temporary bug fix.
		   The protocol should again be explicit about the result value. */
		result = ts->st.tempval;
		ts->st.tempval = NULL;
	}
    return result;
}

PyObject *
slp_frame_dispatch_top(PyFrameObject *f)
{
    PyObject *result;
    PyThreadState *ts = PyThreadState_GET();

    if (f==NULL) return NULL;

    while (1) {

        result = f->f_execute(f);
        f = ts->frame;
		if (f == NULL)
			break;
        else if (result != Py_UnwindToken) {
            ts->st.tempval = result;
        }
    }
	if (result == Py_UnwindToken) {
		/* XXX this is a temporary bug fix.
		   The protocol should again be explicit about the result value. */
		result = ts->st.tempval;
		ts->st.tempval = NULL;
	}
    return result;
}


#endif /* STACKLESS */