Python版本
v3.9.17
分析代码的过程比较枯燥,可以直接跳转到总结。
只能被其他对象引用类型
比如:longobject、floatobject
floatobject
以floatobject为例子来分析,先看看结构定义- typedef struct {
- PyObject_HEAD
- double ob_fval;
- } PyFloatObject;
- // 展开PyObject_HEAD后
- typedef struct {
- PyObject ob_base;
- double ob_fval;
- } PyFloatObject;
- typedef struct _object {
- _PyObject_HEAD_EXTRA
- Py_ssize_t ob_refcnt;
- PyTypeObject *ob_type;
- } PyObject;
复制代码 在PyObject中的_PyObject_HEAD_EXTRA,只有在编译时指定--with-trace-refs才有效,这里忽略即可。- ./configure --with-trace-refs
复制代码 可以看到在PyObject里有一个ob_refcnt的属性,这个就是引用计数。
当对引用计数减为0时,就会调用各类型对应的析构函数。- define Py_DECREF(op) _Py_DECREF(_PyObject_CAST(op))
- void _Py_Dealloc(PyObject *op)
- {
- destructor dealloc = Py_TYPE(op)->tp_dealloc;
- (*dealloc)(op);
- }
- static inline void _Py_DECREF(PyObject *op)
- {
- if (--op->ob_refcnt != 0) {
- }
- else {
- _Py_Dealloc(op);
- }
- }
复制代码 能引用其他对象的类型
比如listobject,dictobject...
listobject
以listobject为例子来分析,先看看结构定义- typedef struct {
- PyObject_VAR_HEAD
- PyObject **ob_item;
- Py_ssize_t allocated;
- } PyListObject;
- // 展开 PyObject_VAR_HEAD
- typedef struct {
- PyVarObject ob_base;
- PyObject **ob_item;
- Py_ssize_t allocated;
- } PyListObject;
- typedef struct {
- PyObject ob_base;
- Py_ssize_t ob_size; /* Number of items in variable part */
- } PyVarObject;
复制代码 可以看出,PyObject_VAR_HEAD也就比PyObject_HEAD多了一个Py_ssize_t ob_size而已,这个属性是用来表示这个可变对象里元素数量。
因为可以引用其他对象,就有可能会出现环引用问题,这种问题如果再使用引用计数来作为GC就会出现问题。- lst1 = []
- lst2 = []
- lst1.append(lst2)
- lst2.append(lst1)
复制代码 当然这种情况可以使用弱引用,或者手动解除环引用。这些解决方案这里不深入,现在主要看看python是怎样应对这种情况。
对于这类型的对象在申请内存的时候调用的是PyObject_GC_New,而不可变类型是用PyObject_MALLOC。为了减少篇幅,删掉了一些判断逻辑。- typedef struct {
- // Pointer to next object in the list.
- // 0 means the object is not tracked
- uintptr_t _gc_next;
- // Pointer to previous object in the list.
- // Lowest two bits are used for flags documented later.
- uintptr_t _gc_prev;
- } PyGC_Head;
- #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
- static PyObject * _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
- {
- PyThreadState *tstate = _PyThreadState_GET();
- GCState *gcstate = &tstate->interp->gc;
- size_t size = sizeof(PyGC_Head) + basicsize;
- PyGC_Head *g;
- g = (PyGC_Head *)PyObject_Malloc(size);
- g->_gc_next = 0;
- g->_gc_prev = 0;
- gcstate->generations[0].count++; /* number of allocated GC objects */
- if (/* 判断是否可以执行GC */)
- {
- gcstate->collecting = 1;
- collect_generations(tstate);
- gcstate->collecting = 0;
- }
- PyObject *op = FROM_GC(g);
- return op;
- }
复制代码 在可变对象中,python又加上了一个PyGC_Head。通过这个PyGC_Head将listobject链接到gc列表中。
在分配完listobject内存后,紧接着调用_PyObject_GC_TRACK,链接到gc列表中。- static inline void _PyObject_GC_TRACK_impl(const char *filename, int lineno,
- PyObject *op)
- {
- PyGC_Head *gc = _Py_AS_GC(op);
- PyThreadState *tstate = _PyThreadState_GET();
- PyGC_Head *generation0 = tstate->interp->gc.generation0;
- PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
- _PyGCHead_SET_NEXT(last, gc);
- _PyGCHead_SET_PREV(gc, last);
- _PyGCHead_SET_NEXT(gc, generation0);
- generation0->_gc_prev = (uintptr_t)gc;
- }
复制代码 通过这里的变量名,可以猜测使用到了分代垃圾回收。
分代回收
python手动执行垃圾回收一般调用gc.collect(generation=2)函数。- #define NUM_GENERATIONS 3
- #define GC_COLLECT_METHODDEF \
- {"collect", (PyCFunction)(void(*)(void))gc_collect, METH_FASTCALL|METH_KEYWORDS, gc_collect__doc__},
- static PyObject *
- gc_collect(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
- {
- PyObject *return_value = NULL;
- int generation = NUM_GENERATIONS - 1;
- Py_ssize_t _return_value;
- _return_value = gc_collect_impl(module, generation);
- if ((_return_value == -1) && PyErr_Occurred()) {
- goto exit;
- }
- return_value = PyLong_FromSsize_t(_return_value);
- exit:
- return return_value;
- }
复制代码 具体执行在gc_collect_impl函数中,接着往下- static Py_ssize_t gc_collect_impl(PyObject *module, int generation)
- {
- PyThreadState *tstate = _PyThreadState_GET();
- GCState *gcstate = &tstate->interp->gc;
- Py_ssize_t n;
- if (gcstate->collecting) {
- /* already collecting, don't do anything */
- n = 0;
- }
- else {
- gcstate->collecting = 1;
- n = collect_with_callback(tstate, generation);
- gcstate->collecting = 0;
- }
- return n;
- }
复制代码 可以看到,如果已经在执行GC,则直接返回。接着看collect_with_callback- static Py_ssize_t
- collect_with_callback(PyThreadState *tstate, int generation)
- {
- assert(!_PyErr_Occurred(tstate));
- Py_ssize_t result, collected, uncollectable;
- invoke_gc_callback(tstate, "start", generation, 0, 0);
- result = collect(tstate, generation, &collected, &uncollectable, 0);
- invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
- assert(!_PyErr_Occurred(tstate));
- return result;
- }
复制代码 其中invoke_gc_callback是调用通过gc.callbacks注册的回调函数,这里我们忽略,重点分析collect函数。
collect函数签名
这段代码很长,我们拆分开来分析,这里会去除掉一些DEBUG相关的逻辑。- static Py_ssize_t collect(PyThreadState *tstate, int generation,Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail);
复制代码- /* merge younger generations with one we are currently collecting */
- for (i = 0; i < generation; i++) {
- gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
- }
复制代码 比如调用gc.collect(2),就表示启动全部的垃圾回收。这里就会将第0、1代的对象合并到第2代上。合并之后第0、1代上就空了,全部可GC的对象都在第2代上。
- /* handy references */
- young = GEN_HEAD(gcstate, generation);
- if (generation < NUM_GENERATIONS-1)
- old = GEN_HEAD(gcstate, generation+1);
- else
- old = young;
- validate_list(old, collecting_clear_unreachable_clear);
- deduce_unreachable(young, &unreachable);
复制代码 这里的young指针指向第2代的链表头,validate_list做校验,这里忽略,重点在deduce_unreachable函数中。- static inline void
- deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
- validate_list(base, collecting_clear_unreachable_clear);
- update_refs(base); // gc_prev is used for gc_refs
- subtract_refs(base);
- gc_list_init(unreachable);
- move_unreachable(base, unreachable); // gc_prev is pointer again
- validate_list(base, collecting_clear_unreachable_clear);
- validate_list(unreachable, collecting_set_unreachable_set);
- }
复制代码 首先调用update_refs更新引用计数
[code]static inline voidgc_reset_refs(PyGC_Head *g, Py_ssize_t refs){ g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED) | PREV_MASK_COLLECTING | ((uintptr_t)(refs) |