aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-11-12 14:32:26 +0000
committerRaymond Hettinger <python@rcn.com>2003-11-12 14:32:26 +0000
commitad983e79d6f215235d205245c2599620e33cf719 (patch)
treef6dcc71e1d751c52d72b095806143a692af7d2b0 /Modules/itertoolsmodule.c
parentMake Message.__str__ more efficient. (diff)
downloadcpython-ad983e79d6f215235d205245c2599620e33cf719.tar.gz
cpython-ad983e79d6f215235d205245c2599620e33cf719.tar.bz2
cpython-ad983e79d6f215235d205245c2599620e33cf719.zip
Improve the implementation of itertools.tee().
Formerly, underlying queue was implemented in terms of two lists. The new queue is a series of singly-linked fixed length lists. The new implementation runs much faster, supports multi-way tees, and allows tees of tees without additional memory costs. The root ideas for this structure were contributed by Andrew Koenig and Guido van Rossum.
Diffstat (limited to 'Modules/itertoolsmodule.c')
-rw-r--r--Modules/itertoolsmodule.c380
1 files changed, 199 insertions, 181 deletions
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index be2d7356fc8..a341a6630a2 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -7,122 +7,102 @@
All rights reserved.
*/
-/* independent iterator object supporting the tee object ***************/
-
-/* The tee object maintains a queue of data seen by the leading iterator
- but not seen by the trailing iterator. When the leading iterator
- gets data from PyIter_Next() it appends a copy to the inbasket stack.
- When the trailing iterator needs data, it is popped from the outbasket
- stack. If the outbasket stack is empty, then it is filled from the
- inbasket (i.e. the queue is implemented using two stacks so that only
- O(n) operations like append() and pop() are used to access data and
- calls to reverse() never move any data element more than once).
-
- If one of the independent iterators gets deallocated, it sets tee's
- save_mode to zero so that future calls to PyIter_Next() stop getting
- saved to the queue (because there is no longer a second iterator that
- may need the data).
+/* tee object and with supporting function and objects ***************/
+
+/* The teedataobject pre-allocates space for LINKCELLS number of objects.
+ To help the object fit neatly inside cache lines (space for 16 to 32
+ pointers), the value should be a multiple of 16 minus space for
+ the other structure members including PyHEAD overhead. The larger the
+ value, the less memory overhead per object and the less time spent
+ allocating/deallocating new links. The smaller the number, the less
+ wasted space and the more rapid freeing of older data.
*/
+#define LINKCELLS 57
typedef struct {
PyObject_HEAD
PyObject *it;
- PyObject *inbasket;
- PyObject *outbasket;
- int save_mode;
- int num_seen;
-} teeobject;
+ int numread;
+ PyObject *nextlink;
+ PyObject *(values[LINKCELLS]);
+} teedataobject;
typedef struct {
PyObject_HEAD
- teeobject *tee;
- int num_seen;
-} iiobject;
+ teedataobject *dataobj;
+ int index;
+} teeobject;
-static PyTypeObject ii_type;
+static PyTypeObject teedataobject_type;
static PyObject *
-ii_next(iiobject *lz)
+teedataobject_new(PyObject *it)
{
- teeobject *to = lz->tee;
- PyObject *result, *tmp;
- int n;
+ teedataobject *tdo;
- if (lz->num_seen == to->num_seen) {
- /* This instance is leading, use iter to get more data */
- result = PyIter_Next(to->it);
- if (result == NULL)
- return NULL;
- if (to->save_mode)
- if(PyList_Append(to->inbasket, result) == -1){
- Py_DECREF(result);
- return NULL;
- }
- to->num_seen++;
- lz->num_seen++;
- return result;
- }
+ tdo = PyObject_New(teedataobject, &teedataobject_type);
+ if (tdo == NULL)
+ return NULL;
- /* This instance is trailing, get data from the queue */
- if (PyList_GET_SIZE(to->outbasket) == 0) {
- /* outbasket is empty, so refill from the inbasket */
- tmp = to->outbasket;
- to->outbasket = to->inbasket;
- to->inbasket = tmp;
- PyList_Reverse(to->outbasket);
- }
+ tdo->numread = 0;
+ tdo->nextlink = NULL;
+ Py_INCREF(it);
+ tdo->it = it;
+ return (PyObject *)tdo;
+}
- /* Pop result from to->outbasket */
- n = PyList_GET_SIZE(to->outbasket);
- assert(n>0);
- result = PyList_GET_ITEM(to->outbasket, n-1);
- Py_INCREF(result);
- if (PyList_SetSlice(to->outbasket, n-1, n, NULL) == -1) {
- Py_DECREF(result);
- return NULL;
- }
- lz->num_seen++;
- return result;
+static PyObject *
+teedataobject_jumplink(teedataobject *tdo)
+{
+ if (tdo->nextlink == NULL)
+ tdo->nextlink = teedataobject_new(tdo->it);
+ Py_INCREF(tdo->nextlink);
+ return tdo->nextlink;
}
-static void
-ii_dealloc(iiobject *ii)
+static PyObject *
+teedataobject_getitem(teedataobject *tdo, int i)
{
- teeobject *to = ii->tee;
- int n;
-
- PyObject_GC_UnTrack(ii);
- ii->tee->save_mode = 0; /* Stop saving data */
- if (ii->num_seen < to->num_seen) {
- /* It is the trailing iterator being freed; this means
- that the stored queue data is no longer needed */
- n = PyList_GET_SIZE(to->inbasket);
- PyList_SetSlice(to->inbasket, 0, n, NULL);
- n = PyList_GET_SIZE(to->outbasket);
- PyList_SetSlice(to->outbasket, 0, n, NULL);
+ PyObject *value;
+
+ assert(i < LINKCELLS);
+ if (i < tdo->numread)
+ value = tdo->values[i];
+ else {
+ /* this is the lead iterator, so fetch more data */
+ assert(i == tdo->numread);
+ value = PyIter_Next(tdo->it);
+ if (value == NULL)
+ return NULL;
+ tdo->numread++;
+ tdo->values[i] = value;
}
- Py_XDECREF(ii->tee);
- PyObject_GC_Del(ii);
+ Py_INCREF(value);
+ return value;
}
-static int
-ii_traverse(iiobject *ii, visitproc visit, void *arg)
+static void
+teedataobject_dealloc(teedataobject *tdo)
{
- if (ii->tee)
- return visit((PyObject *)(ii->tee), arg);
- return 0;
+ int i;
+
+ for (i=0 ; i<tdo->numread ; i++)
+ Py_DECREF(tdo->values[i]);
+ Py_XDECREF(tdo->it);
+ Py_XDECREF(tdo->nextlink);
+ PyObject_Del(tdo);
}
-PyDoc_STRVAR(ii_doc, "Independent iterator created by tee(iterable).");
+PyDoc_STRVAR(teedataobject_doc, "Data container common to multiple tee objects.");
-static PyTypeObject ii_type = {
+static PyTypeObject teedataobject_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
- "itertools.tee_iterator", /* tp_name */
- sizeof(iiobject), /* tp_basicsize */
+ "itertools.tee_dataobject", /* tp_name */
+ sizeof(teedataobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
- (destructor)ii_dealloc, /* tp_dealloc */
+ (destructor)teedataobject_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
@@ -137,112 +117,96 @@ static PyTypeObject ii_type = {
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
- ii_doc, /* tp_doc */
- (traverseproc)ii_traverse, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)ii_next, /* tp_iternext */
- 0, /* tp_methods */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ teedataobject_doc, /* tp_doc */
+ 0, /* tp_traverse */
};
-/* tee object **********************************************************/
static PyTypeObject tee_type;
static PyObject *
-tee_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+tee_next(teeobject *to)
{
- PyObject *it = NULL;
- PyObject *iterable;
- PyObject *inbasket = NULL, *outbasket = NULL, *result = NULL;
- teeobject *to = NULL;
- int i;
+ PyObject *value, *link;
- if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable))
+ if (to->index >= LINKCELLS) {
+ link = teedataobject_jumplink(to->dataobj);
+ Py_XDECREF(to->dataobj);
+ to->dataobj = (teedataobject *)link;
+ to->index = 0;
+ }
+ value = teedataobject_getitem(to->dataobj, to->index);
+ if (value == NULL)
return NULL;
+ to->index++;
+ return value;
+}
+
+static PyObject *
+tee_copy(teeobject *to)
+{
+ teeobject *newto;
+
+ newto = PyObject_New(teeobject, &tee_type);
+ if (newto == NULL)
+ return NULL;
+ Py_INCREF(to->dataobj);
+ newto->dataobj = to->dataobj;
+ newto->index = to->index;
+ return (PyObject *)newto;
+}
+
+PyDoc_STRVAR(teecopy_doc, "Returns an independent iterator.");
+
+static PyObject *
+tee_fromiterable(PyObject *iterable)
+{
+ teeobject *to;
+ PyObject *it = NULL;
it = PyObject_GetIter(iterable);
- if (it == NULL) goto fail;
-
- inbasket = PyList_New(0);
- if (inbasket == NULL) goto fail;
-
- outbasket = PyList_New(0);
- if (outbasket == NULL) goto fail;
-
- to = PyObject_GC_New(teeobject, &tee_type);
- if (to == NULL) goto fail;
-
- to->it = it;
- to->inbasket = inbasket;
- to->outbasket = outbasket;
- to->save_mode = 1;
- to->num_seen = 0;
- PyObject_GC_Track(to);
-
- /* create independent iterators */
- result = PyTuple_New(2);
- if (result == NULL) goto fail;
- for (i=0 ; i<2 ; i++) {
- iiobject *indep_it = PyObject_GC_New(iiobject, &ii_type);
- if (indep_it == NULL) goto fail;
- Py_INCREF(to);
- indep_it->tee = to;
- indep_it->num_seen = 0;
- PyObject_GC_Track(indep_it);
- PyTuple_SET_ITEM(result, i, (PyObject *)indep_it);
+ if (it == NULL)
+ return NULL;
+ if (PyObject_TypeCheck(it, &tee_type)) {
+ to = (teeobject *)tee_copy((teeobject *)it);
+ goto done;
}
- goto succeed;
-fail:
+
+ to = PyObject_New(teeobject, &tee_type);
+ if (to == NULL)
+ goto done;
+ to->dataobj = (teedataobject *)teedataobject_new(it);
+ to->index = 0;
+done:
Py_XDECREF(it);
- Py_XDECREF(inbasket);
- Py_XDECREF(outbasket);
- Py_XDECREF(result);
-succeed:
- Py_XDECREF(to);
- return result;
+ return (PyObject *)to;
}
-static void
-tee_dealloc(teeobject *to)
+static PyObject *
+tee_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
- PyObject_GC_UnTrack(to);
- Py_XDECREF(to->inbasket);
- Py_XDECREF(to->outbasket);
- Py_XDECREF(to->it);
- PyObject_GC_Del(to);
+ PyObject *iterable;
+
+ if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable))
+ return NULL;
+ return tee_fromiterable(iterable);
}
-static int
-tee_traverse(teeobject *to, visitproc visit, void *arg)
+static void
+tee_dealloc(teeobject *to)
{
- int err;
-
- if (to->it) {
- err = visit(to->it, arg);
- if (err)
- return err;
- }
- if (to->inbasket) {
- err = visit(to->inbasket, arg);
- if (err)
- return err;
- }
- if (to->outbasket) {
- err = visit(to->outbasket, arg);
- if (err)
- return err;
- }
- return 0;
+ Py_XDECREF(to->dataobj);
+ PyObject_Del(to);
}
-PyDoc_STRVAR(tee_doc,
-"tee(iterable) --> (it1, it2)\n\
-\n\
-Split the iterable into two independent iterables.");
+PyDoc_STRVAR(teeobject_doc,
+"Iterator wrapped to make it copyable");
+
+static PyMethodDef tee_methods[] = {
+ {"__copy__", (PyCFunction)tee_copy, METH_NOARGS, teecopy_doc},
+ {NULL, NULL} /* sentinel */
+};
static PyTypeObject tee_type = {
PyObject_HEAD_INIT(NULL)
@@ -266,15 +230,15 @@ static PyTypeObject tee_type = {
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
- tee_doc, /* tp_doc */
- (traverseproc)tee_traverse, /* tp_traverse */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ teeobject_doc, /* tp_doc */
+ 0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- 0, /* tp_methods */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)tee_next, /* tp_iternext */
+ tee_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
@@ -285,8 +249,52 @@ static PyTypeObject tee_type = {
0, /* tp_init */
0, /* tp_alloc */
tee_new, /* tp_new */
+ PyObject_Del, /* tp_free */
};
+static PyObject *
+tee(PyObject *self, PyObject *args)
+{
+ int i, n=2;
+ PyObject *it, *iterable, *copyable, *result;
+
+ if (!PyArg_ParseTuple(args, "O|i", &iterable, &n))
+ return NULL;
+ result = PyTuple_New(n);
+ if (result == NULL)
+ return NULL;
+ if (n == 0)
+ return result;
+ it = PyObject_GetIter(iterable);
+ if (it == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ if (!PyObject_HasAttrString(it, "__copy__")) {
+ copyable = tee_fromiterable(it);
+ Py_DECREF(it);
+ if (copyable == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ } else
+ copyable = it;
+ PyTuple_SET_ITEM(result, 0, copyable);
+ for (i=1 ; i<n ; i++) {
+ copyable = PyObject_CallMethod(copyable, "__copy__", NULL);
+ if (copyable == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(result, i, copyable);
+ }
+ return result;
+}
+
+PyDoc_STRVAR(tee_doc,
+"tee(iterable, n=2) --> tuple of n independent iterators.");
+
+
/* cycle object **********************************************************/
typedef struct {
@@ -2091,13 +2099,18 @@ islice(seq, [start,] stop [, step]) --> elements from\n\
seq[start:stop:step]\n\
imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\
starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
-tee(it) --> (it1, it2) splits one iterator into two \n\
+tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\
");
+static PyMethodDef module_methods[] = {
+ {"tee", (PyCFunction)tee, METH_VARARGS, tee_doc},
+ {NULL, NULL} /* sentinel */
+};
+
PyMODINIT_FUNC
inititertools(void)
{
@@ -2105,7 +2118,6 @@ inititertools(void)
PyObject *m;
char *name;
PyTypeObject *typelist[] = {
- &tee_type,
&cycle_type,
&dropwhile_type,
&takewhile_type,
@@ -2121,7 +2133,7 @@ inititertools(void)
NULL
};
- m = Py_InitModule3("itertools", NULL, module_doc);
+ m = Py_InitModule3("itertools", module_methods, module_doc);
for (i=0 ; typelist[i] != NULL ; i++) {
if (PyType_Ready(typelist[i]) < 0)
@@ -2131,4 +2143,10 @@ inititertools(void)
Py_INCREF(typelist[i]);
PyModule_AddObject(m, name+1, (PyObject *)typelist[i]);
}
+
+ if (PyType_Ready(&teedataobject_type) < 0)
+ return;
+ if (PyType_Ready(&tee_type) < 0)
+ return;
+
}