*说明
如果需要用到这些知识却没有掌握,则会让人感到沮丧,也可能导致面试被拒。无论是花几天时间“突击”,还是利用零碎的时间持续学习,在数据结构上下点功夫都是值得的。那么Python 中有哪些数据结构呢?列表、字典、集合,还有……栈?Python 有栈吗?本系列文章将给出详细拼图。
第5章:Searching 和 Sorting
排序和查找是最基础和频繁的操作,python内置了in操作符和bisect二分操作模块实现查找,内置了sorted方法来实现排序操作。二分和快排也是面试中经常考到的,本章讲的是基本的排序和查找。
def binary_search(sorted_seq, val):""" 实现标准库中的bisect.bisect_left """low = 0high = len(sorted_seq) - 1while low <= high:mid = (high + low) // 2if sorted_seq[mid] == val:return midelif val < sorted_seq[mid]:high = mid - 1else:low = mid + 1return lowdef bubble_sort(seq): # O(n^2), n(n-1)/2 = 1/2(n^2 + n)n = len(seq)for i in range(n-1):for j in range(n-1-i): # 这里之所以 n-1 还需要 减去 i 是因为每一轮冒泡最大的元素都会冒泡到最后,无需再比较if seq[j] > seq[j+1]:seq[j], seq[j+1] = seq[j+1], seq[j]def select_sort(seq):"""可以看作是冒泡的改进,每次找一个最小的元素交换,每一轮只需要交换一次"""n = len(seq)for i in range(n-1):min_idx = i # assume the ith element is the smallestfor j in range(i+1, n):if seq[j] < seq[min_idx]: # find the minist element indexmin_idx = jif min_idx != i: # swapseq[i], seq[min_idx] = seq[min_idx], seq[i]def insertion_sort(seq):""" 每次挑选下一个元素插入已经排序的数组中,初始时已排序数组只有一个元素"""n = len(seq)for i in range(1, n):value = seq[i] # save the value to be positioned# find the position where value fits in the ordered part of the listpos = iwhile pos > 0 and value < seq[pos-1]:# Shift the items to the right during the searchseq[pos] = seq[pos-1]pos -= 1seq[pos] = valuedef merge_sorted_list(listA, listB):""" 归并两个有序数组 """new_list = list()a = b = 0while a < len(listA) and b < len(listB):if listA[a] < listB[b]:new_list.append(listA[a])a += 1else:new_list.append(listB[b])b += 1while a < len(listA):new_list.append(listA[a])a += 1while b < len(listB):new_list.append(listB[b])b += 1return new_list
第6章: Linked Structure
list是最常用的数据结构,但是list在中间增减元素的时候效率会很低,这时候linked list会更适合,缺点就是获取元素的平均时间复杂度变成了O(n)
# 单链表实现 class ListNode:def __init__(self, data):self.data = dataself.next = Nonedef travsersal(head, callback):curNode = headwhile curNode is not None:callback(curNode.data)curNode = curNode.nextdef unorderdSearch(head, target):curNode = headwhile curNode is not None and curNode.data != target:curNode = curNode.nextreturn curNode is not None# Given the head pointer, prepend an item to an unsorted linked list. def prepend(head, item):newNode = ListNode(item)newNode.next = headhead = newNode# Given the head reference, remove a target from a linked list def remove(head, target):predNode = NonecurNode = headwhile curNode is not None and curNode.data != target:# 寻找目标predNode = curNodecurNode = curNode.dataif curNode is not None:if curNode is head:head = curNode.nextelse:predNode.next = curNode.next
第7章:Stacks
栈也是计算机里用得比较多的数据结构,栈是一种后进先出的数据结构,可以理解为往一个桶里放盘子,先放进去的会被压在地下,拿盘子的时候,后放的会被先拿出来。
class Stack:""" Stack ADT, using a python listStack()isEmpty()length()pop(): assert not emptypeek(): assert not empty, return top of non-empty stack without removing itpush(item)"""def __init__(self):self._items = list()def isEmpty(self):return len(self) == 0def __len__(self):return len(self._items)def peek(self):assert not self.isEmpty()return self._items[-1]def pop(self):assert not self.isEmpty()return self._items.pop()def push(self, item):self._items.append(item)class Stack:""" Stack ADT, use linked list使用list实现很简单,但是如果涉及大量push操作,list的空间不够时复杂度退化到O(n)而linked list可以保证最坏情况下仍是O(1)"""def __init__(self):self._top = None # top节点, _StackNode or Noneself._size = 0 # intdef isEmpty(self):return self._top is Nonedef __len__(self):return self._sizedef peek(self):assert not self.isEmpty()return self._top.itemdef pop(self):assert not self.isEmpty()node = self._topself.top = self._top.nextself._size -= 1return node.itemdef _push(self, item):self._top = _StackNode(item, self._top)self._size += 1class _StackNode:def __init__(self, item, link):self.item = itemself.next = link
第8章:Queues
队列也是经常使用的数据结构,比如发送消息等,celery可以使用redis提供的list实现消息队列。 本章我们用list和linked list来实现队列和优先级队列。
class Queue:""" Queue ADT, use list。list实现,简单但是push和pop效率最差是O(n)Queue()isEmpty()length()enqueue(item)dequeue()"""def __init__(self):self._qList = list()def isEmpty(self):return len(self) == 0def __len__(self):return len(self._qList)def enquue(self, item):self._qList.append(item)def dequeue(self):assert not self.isEmpty()return self._qList.pop(0)from array import Array # Array那一章实现的Array ADT class Queue:"""circular Array ,通过头尾指针实现。list内置append和pop复杂度会退化,使用环数组实现可以使得入队出队操作时间复杂度为O(1),缺点是数组长度需要固定。"""def __init__(self, maxSize):self._count = 0self._front = 0self._back = maxSize - 1self._qArray = Array(maxSize)def isEmpty(self):return self._count == 0def isFull(self):return self._count == len(self._qArray)def __len__(self):return len(self._count)def enqueue(self, item):assert not self.isFull()maxSize = len(self._qArray)self._back = (self._back + 1) % maxSize # 移动尾指针self._qArray[self._back] = itemself._count += 1def dequeue(self):assert not self.isFull()item = self._qArray[self._front]maxSize = len(self._qArray)self._front = (self._front + 1) % maxSizeself._count -= 1return itemclass _QueueNode:def __init__(self, item):self.item = itemclass Queue:""" Queue ADT, linked list 实现。为了改进环型数组有最大数量的限制,改用带有头尾节点的linked list实现。"""def __init__(self):self._qhead = Noneself._qtail = Noneself._qsize = 0def isEmpty(self):return self._qhead is Nonedef __len__(self):return self._countdef enqueue(self, item):node = _QueueNode(item) # 创建新的节点并用尾节点指向他if self.isEmpty():self._qhead = nodeelse:self._qtail.next = nodeself._qtail = nodeself._qcount += 1def dequeue(self):assert not self.isEmpty(), 'Can not dequeue from an empty queue'node = self._qheadif self._qhead is self._qtail:self._qtail = Noneself._qhead = self._qhead.next # 前移头节点self._count -= 1return node.itemclass UnboundedPriorityQueue:""" PriorityQueue ADT: 给每个item加上优先级p,高优先级先dequeue分为两种:- bounded PriorityQueue: 限制优先级在一个区间[0...p)- unbounded PriorityQueue: 不限制优先级PriorityQueue()BPriorityQueue(numLevels): create a bounded PriorityQueue with priority in range[0, numLevels-1]isEmpty()length()enqueue(item, priority): 如果是bounded PriorityQueue, priority必须在区间内dequeue(): 最高优先级的出队,同优先级的按照FIFO顺序- 两种实现方式:1.入队的时候都是到队尾,出队操作找到最高优先级的出队,出队操作O(n)2.始终维持队列有序,每次入队都找到该插入的位置,出队操作是O(1)(注意如果用list实现list.append和pop操作复杂度会因内存分配退化)"""from collections import namedtuple_PriorityQEntry = namedtuple('_PriorityQEntry', 'item, priority')# 采用方式1,用内置list实现unbounded PriorityQueuedef __init__(self):self._qlist = list()def isEmpty(self):return len(self) == 0def __len__(self):return len(self._qlist)def enqueue(self, item, priority):entry = UnboundedPriorityQueue._PriorityQEntry(item, priority)self._qlist.append(entry)def deque(self):assert not self.isEmpty(), 'can not deque from an empty queue'highest = self._qlist[0].priorityfor i in range(len(self)): # 出队操作O(n),遍历找到最高优先级if self._qlist[i].priority < highest:highest = self._qlist[i].priorityentry = self._qlist.pop(highest)return entry.itemclass BoundedPriorityQueue:""" BoundedPriorityQueue ADT,用linked list实现。上一个地方提到了 BoundedPriorityQueue但是为什么需要 BoundedPriorityQueue呢? BoundedPriorityQueue 的优先级限制在[0, maxPriority-1]对于 UnboundedPriorityQueue,出队操作由于要遍历寻找优先级最高的item,所以平均是O(n)的操作,但是对于 BoundedPriorityQueue,用队列数组实现可以达到常量时间,用空间换时间。比如要弹出一个元素,直接找到第一个非空队列弹出 元素就可以了。(小数字代表高优先级,先出队)qlist[0] -> ["white"][1][2] -> ["black", "green"][3] -> ["purple", "yellow"]"""# Implementation of the bounded Priority Queue ADT using an array of ## queues in which the queues are implemented using a linked list.from array import Array # 第二章定义的ADTdef __init__(self, numLevels):self._qSize = 0self._qLevels = Array(numLevels)for i in range(numLevels):self._qLevels[i] = Queue() # 上一节讲到用linked list实现的Queuedef isEmpty(self):return len(self) == 0def __len__(self):return len(self._qSize)def enqueue(self, item, priority):assert priority >= 0 and priority < len(self._qLevels), 'invalid priority'self._qLevel[priority].enquue(item) # 直接找到 priority 对应的槽入队def deque(self):assert not self.isEmpty(), 'can not deque from an empty queue'i = 0p = len(self._qLevels)while i < p and not self._qLevels[i].isEmpty(): # 找到第一个非空队列i += 1return self._qLevels[i].dequeue()