• 功能基础篇7——Python基础数据结构与算法标准库


    基础数据结构与算法

    collections.abc

    容器数据类型抽象基类collections.abcPython标准库

    collections

    容器数据类型,collections为Python标准库

    命名元组

    from collections import namedtuple
    
    Card = namedtuple('card', ['suits', 'number'])
    c1 = Card('红桃', 2)
    print(c1, c1.number, c1.suits)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    from random import choice, shuffle
    from collections import namedtuple
    
    _Card = namedtuple('Card', ['rank', 'suit'])
    
    
    class Poker:
        ranks = [str(n) for n in range(2, 11)] + list('JQKA')
        suits = ['红心', '方片', '梅花', '黑桃']
    
        def __init__(self):
            self._cards = [_Card(rank, suit) for rank in Poker.ranks for suit in Poker.suits]
    
        def __len__(self):
            return self._cards.__len__()
    
        def __getitem__(self, item):
            return self._cards[item]
    
        def __setitem__(self, key, value):
            self._cards[key] = value
    
    
    cards = Poker()
    print(cards[3])
    print(len(cards))
    shuffle(cards)  # 会改变每个索引对应的值,需要__setitem__
    print(cards[3])
    print(cards[:3])
    print(choice(cards))
    print(choice(cards))
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31

    双端队列

    from collections import deque
    
    dq = deque()
    dq.append(1)
    dq.append(2)
    print(dq.popleft())  # 1
    print(dq.pop())  # 2
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7

    计数器

    可哈希

    from collections import Counter
    
    items = ['red', 'blue', 'red', 'green', 'blue', 'blue']
    print(Counter(items))  # Counter({'blue': 3, 'red': 2, 'green': 1})
    
    • 1
    • 2
    • 3
    • 4

    带默认工厂字典

    from collections import defaultdict
    
    s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
    d = defaultdict(list)
    for k, v in s:
        d[k].append(v)  # 会有一个None到[]的自动变换,否则会报错,处理空值
    # default_factory为可调用对象,用于产生一个默认值
    print(sorted(d.items()))  # [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
    
    dd = defaultdict(lambda: 0)
    print(dd['key'])  # 0
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    有序字典

    有序字典中元素排列顺序与插入顺序保持一致,Python3.6及以前dict无序,Python3.7及以后有序

    from collections import OrderedDict
    
    d = dict([('a', 1), ('b', 2), ('c', 3)])
    print(d)
    
    od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
    print(od)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7

    ChainMap

    倒序迭代

    from collections import ChainMap
    
    baseline = {'music': 'bach', 'art': 'rembrandt'}
    adjustments = {'art': 'van gogh', 'opera': 'carmen'}
    cm = ChainMap(adjustments, baseline)
    for i in cm:
        print(i, end=" ")  # music art opera 
    print(list(cm))  # ['music', 'art', 'opera']
    print(cm.get('art'))  # van gogh
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    UserString、UserList、UserDict

    当需要对内置数据类型str、list、dict进行拓展,并且想要覆写其某些标准功能时,应该使用UserDict, UserList, 和UserString,而不是直接继承str、list、dict

    from collections import UserString, UserList, UserDict
    
    us = UserString("abc")
    print(us)
    print(us.data)
    
    ul = UserList([1, 2, 3])
    print(ul)
    print(ul.data)
    
    ud = UserDict([(1, 'a'), (2, 'b'), (3, 'c')])
    print(ud)
    print(ud.data)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    实现一个自动转大写key的字段,初始化、[]设置属性__setitem__、update分别需要重写,若使用UserDict只需重写__setitem__

    class UpperDict(dict):
    
        def __init__(self, data):
            data = {k.upper(): v for k, v in data.items()}
            super().__init__(data)
    
        def __setitem__(self, key, value):
            key = key.upper()
            super().__setitem__(key, value)
    
        def update(self, data):  # 方法 'UpperDict.update()' 的签名与类 'MutableMapping' 中基方法的签名不匹配
            data = {k.upper(): v for k, v in data.items()}
            super().update(data)
    
    
    d = UpperDict({'a': 1})
    d['b'] = 1
    d.update({'c': 1})
    print(d)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    class UpperDict(UserDict):
        def __setitem__(self, key, value):
            key = key.upper()
            super().__setitem__(key, value)
    
    
    d = UpperDict({'a': 1})
    d['b'] = 1
    d.update({'c': 1})
    print(d)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10

    queue

    queue为Python标准库,同步队列,线程安全

    import queue
    
    q = queue.Queue()
    print(q.put("1"))  # None
    print(q.qsize())  # 1
    print(q.get())  # 1
    print(q.put_nowait("2"))  # None
    print(q.get_nowait())  # 2
    
    # 后进先出队列
    lifo_queue = queue.LifoQueue()
    print(lifo_queue.put(1))  # None
    print(lifo_queue.put(2))  # None
    print(lifo_queue.get())  # 2
    
    # 优先队列,最小的先出
    pq = queue.PriorityQueue()
    pq.put(3)
    pq.put(1)
    print(pq.get())  # 1
    pq.put(4)
    pq.put(0)
    print(pq.get())  # 0
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23

    enum

    enum为Python标准库,枚举

    from enum import Enum
    
    
    class Color(Enum):
        RED = 1
        GREEN = 2
        BLUE = 3
    
    
    print(Color.RED.name, Color.RED.value)  # RED 1
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10

    array

    array为Python标准库,数字数组,纯数字操作比list效率高

    from array import array
    
    a = array('l', [1, 2, 3, 4, 5])
    print(a, type(a))
    
    • 1
    • 2
    • 3
    • 4

    heapq

    heapq为Python标准库,堆队列算法(优先队列算法),堆使用小根堆,基于数组实现

    import heapq
    import operator
    
    l = [5, 3, 4, 2, 1]
    
    # 堆化
    heapq.heapify(l)
    print(l)  # [1, 2, 4, 5, 3]
    print(l[0])  # 1 第一个元素为小根堆最小值
    
    # 入堆
    heapq.heappush(l, 6)
    print(l)  # [1, 2, 4, 5, 3, 6]
    
    # 出堆
    print(heapq.heappop(l))  # 1
    
    # 先入后出,比heappush+heappop高效
    print(heapq.heappushpop(l, 0))  # 0
    print(l)  # [2, 3, 4, 5, 6]
    
    # 先出后入,比heappop+heappush高效
    print(heapq.heapreplace(l, 0))  # 2
    print(l)  # [0, 3, 4, 5, 6]
    
    # 获取迭代器最大的n个元素
    print(heapq.nlargest(10, range(100, 200, 5)))  # [195, 190, 185, 180, 175, 170, 165, 160, 155, 150]
    
    # 获取迭代器最小的n个元素
    print(heapq.nsmallest(10, range(100, 200, 5)))  # [100, 105, 110, 115, 120, 125, 130, 135, 140, 145]
    
    
    # 若干有序迭代器元素合并,合并结果为一个生成器,日志合并示例
    log_file1 = ["2023-10-23 10:12:13 [INFO] ...", "2023-10-23 10:12:14 [INFO] ...", "2023-10-23 10:12:15 [INFO] ..."]
    log_file2 = ["2023-10-23 10:12:12 [INFO] ...", "2023-10-23 10:12:14 [DEBUG] ...", "2023-10-23 10:12:14 [INFO] ..."]
    log_file3 = ["2023-10-23 10:12:11 [WARN] ...", "2023-10-23 10:12:18 [INFO] ...", "2023-10-23 10:12:20 [ERROR] ..."]
    
    for line in heapq.merge(log_file1, log_file2, log_file3, key=operator.itemgetter(slice(0, 20))):
        print(line)
    # 2023-10-23 10:12:11 [WARN] ...
    # 2023-10-23 10:12:12 [INFO] ...
    # 2023-10-23 10:12:13 [INFO] ...
    # 2023-10-23 10:12:14 [INFO] ...
    # 2023-10-23 10:12:14 [DEBUG] ...
    # 2023-10-23 10:12:14 [INFO] ...
    # 2023-10-23 10:12:15 [INFO] ...
    # 2023-10-23 10:12:18 [INFO] ...
    # 2023-10-23 10:12:20 [ERROR] ...
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48

    bisect

    bisect为Python标准库,基于二分查找算法定位插入点,模块中函数基于插入点设计,并非使用__eq__查找特定值,而是使用__lt__查找插入点。

    公共参数说明

    • a,列表
    • x,列表元素
    • lo,低位下标
    • hi,高位下标
    • key,比较键

    函数说明

    • bisect.bisect_left(a, x, lo=0, hi=len(a), *, key=None),返回的插入点ip将数组a分为两个切片使得对于左侧切片 all(elem < x for elem in a[lo : ip]) 为真值而对于右侧切片 all(elem >= x for elem in a[ip : hi]) 为真值
    • bisect.bisect_right(a, x, lo=0, hi=len(a), *, key=None),返回的插入点ip将数组a分为两个切片使得对于左侧切片 all(elem <= x for elem in a[lo : ip]) 为真值而对于右则切片 all(elem > x for elem in a[ip : hi]) 为真值
    • bisect.insort_right(a, x, lo=0, hi=len(a), *, key=None),基于bisect_right函数定位插入点然后插入元素,插入算法时间复杂度为O(n)
    • bisect.insort_left(a, x, lo=0, hi=len(a), *, key=None),基于bisect_left函数定位插入点然后插入元素,插入算法时间复杂度为O(n)
    • bisect.bisectbisect.bisect_right等价
    • bisect.insortbisect.insort_right等价
    import bisect
    import operator
    from collections import namedtuple
    
    l = [1, 3, 5, 7, 9]
    
    print(bisect.bisect_right(l, 5))  # 3
    print(bisect.bisect_left(l, 5))  # 2
    print(bisect.bisect_right(l, 4))  # 2
    print(bisect.bisect_left(l, 4))  # 2
    print(bisect.bisect_right(l, 9))  # 5
    print(bisect.bisect_left(l, 9))  # 4
    print(bisect.bisect_right(l, 1))  # 1
    print(bisect.bisect_left(l, 1))  # 0
    print(bisect.bisect_right(l, 0))  # 0
    print(bisect.bisect_left(l, 0))  # 0
    print(bisect.bisect_right(l, 10))  # 5
    print(bisect.bisect_left(l, 10))  # 5
    
    print(bisect.bisect_left(l, 5))  # 2
    bisect.insort_left(l, 5)
    print(l)  # [1, 3, 5, 5, 7, 9] 插入的是第一个5 left会定位所有 满足小于5 的元素的下一个位置  
    
    print(bisect.bisect_right(l, 5))  # 4
    bisect.insort_right(l, 5)
    print(l)  # [1, 3, 5, 5, 5, 7, 9] 插入的是第三个5 right会定位所有 满足大于5 的元素的上一个位置
    
    # 若数组无序则不会正常工作
    a = [1, 5, 3, 6]
    print(bisect.bisect_left(a, 5))  # 3
    bisect.bisect_left(a, 5)  # [1, 5, 3, 6]
    print(a)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32

    基于定位插入点特性便于分段

    import bisect
    import operator
    from collections import namedtuple
    
    for score in [33, 99, 77, 70, 89, 90, 100]:
        print('FDCBA'[bisect.bisect([60, 70, 80, 90], score)])
    # F A C C B A A
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7

    基于定位插入点特性不便于查找特定元素,通常需要封装一层函数

    import bisect
    import operator
    from collections import namedtuple
    
    Movie = namedtuple('Movie', ('id', 'name', 'released', 'director'))
    
    movies = [
        Movie(1, 'Jaws', 1975, 'Spielberg'),
        Movie(3, 'Titanic', 1997, 'Cameron'),
        Movie(5, 'The Birds', 1963, 'Hitchcock'),
        Movie(7, 'Aliens', 1986, 'Cameron')
    ]
    
    print(bisect.bisect_left(movies, 2, key=operator.attrgetter('id')))  # 1
    print(bisect.bisect_left(movies, 3, key=operator.attrgetter('id')))  # 1
    print(bisect.bisect_right(movies, 3, key=operator.attrgetter('id')))  # 2
    
    
    def index(a, x, key=None):
        i = bisect.bisect_left(a, x, key=key)
        if i != len(a) and (key(a[i]) if key else a[i]) == x:
            return i
        raise ValueError
    
    
    def find_lt(a, x, key=None):
        i = bisect.bisect_left(a, x, key=key)
        if i:
            return a[i - 1]
        raise ValueError
    
    
    def find_le(a, x, key=None):
        i = bisect.bisect_right(a, x, key=key)
        if i:
            return a[i - 1]
        raise ValueError
    
    
    def find_gt(a, x, key=None):
        i = bisect.bisect_right(a, x, key=key)
        if i != len(a):
            return a[i]
        raise ValueError
    
    
    def find_ge(a, x, key=None):
        i = bisect.bisect_left(a, x, key=key)
        if i != len(a):
            return a[i]
        raise ValueError
    
    
    print(index(movies, 5, key=operator.attrgetter('id')))  # 2
    print(index(movies, 2, key=operator.attrgetter('id')))  # ValueError
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55

    三方有序容器库,https://grantjenks.com/docs/sortedcollections/

    浅拷贝与深拷贝,shallow copy and deep copy

    import copy
    
    l = [1, 2, [3]]
    copy_copy = copy.copy(l)
    copy_copy[0] = '1a'
    copy_copy[2][0] = '3a'
    print(l, copy_copy)  # [1, 2, ['3a']] ['1a', 2, ['3a']]
    
    copy_deepcopy = copy.deepcopy(copy_copy)
    copy_deepcopy[0] = 1
    copy_deepcopy[2][0] = 3
    print(copy_copy, copy_deepcopy)  # ['1a', 2, ['3a']] [1, 2, [3]]
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
  • 相关阅读:
    python中.txt文件的使用【txt读取和写入】
    沉睡者 - 怎么样可以在网络上挣钱,告诉你网上挣钱的5种方法!
    第9章:React Hooks
    Hikvison对接NVR实现WEB无插件开发包实现前端视频预览(html、vue、nginx代理)
    pycharm 包安装失败,换源下载,一行命令
    后台管理-----搜索框重置
    iMovie for Mac v10.3.9(视频剪辑)
    干货 | Elasticsearch 8.X 实战视频合集(80 小时+)
    Nginx超时配置
    【论文阅读】DALL·E: Zero-Shot Text-to-Image Generation
  • 原文地址:https://blog.csdn.net/UZDW_/article/details/134000563