根据之前发布的思路 第一步 进行数据合并
- import pandas as pd
-
- # 读取所有附件的数据
- data1 = pd.read_excel('附件一.xlsx')
- data2 = pd.read_excel('附件二.xlsx')
- data3 = pd.read_excel('附件三.xlsx')
- data4 = pd.read_excel('附件四.xlsx')
-
- # 根据商品编码将附件一和附件二连接
- combinedData = pd.merge(data1, data2, on='商品编码', how='inner')
-
- # 根据商家编码将上述的结果和附件三连接
- combinedData = pd.merge(combinedData, data3, on='商家编码', how='inner')
-
- # 根据仓库编码将上述的结果和附件四连接
- combinedData = pd.merge(combinedData, data4, on='仓库编码', how='inner')
-
- # 保存合并后的数据到新的Excel文件中
- combinedData.to_excel('合并后的数据.xlsx', index=False, engine='openpyxl')
或者 matlab
- % 读取所有附件的数据
- data1 = readtable('附件一.xlsx');
- data2 = readtable('附件二.xlsx');
- data3 = readtable('附件三.xlsx');
- data4 = readtable('附件四.xlsx');
-
- % 根据商品编码将附件一和附件二连接
- combinedData = innerjoin(data1, data2, 'Keys', '商品编码');
-
- % 根据商家编码将上述的结果和附件三连接
- combinedData = innerjoin(combinedData, data3, 'Keys', '商家编码');
-
- % 根据仓库编码将上述的结果和附件四连接
- combinedData = innerjoin(combinedData, data4, 'Keys', '仓库编码');
-
- % 保存合并后的数据到新的Excel文件中
- writetable(combinedData, '合并后的数据.xlsx');
之后就是对于数据的 转码 将文本数据转化为数字
py代码
- # 定义数据
- data = {
- "商家编码": {"1": "seller_10", "2": "seller_11", "3": "seller_12", "4": "seller_13", ...},
- "商品编码": {"1": "product_1914", "2": "product_1915", "3": "product_1916", ...},
- "仓库编码": {"1": "wh_1", "2": "wh_10", "3": "wh_11", ...},
- "商品一级分类": {"1": "手机通讯", "2": "食品饮料", "3": "家庭清洁/纸品", ...},
- "商品二级分类": {"1": "手机配件", "2": "粮油调味", "3": "进口食品", ...},
- "商品三级分类": {"1": "手机配件_5", "2": "粮油调味_1", "3": "进口食品_1", ...},
- "商家分类": {"1": "手机通讯", "2": "食品饮料", "3": "美妆护肤", ...},
- "库存分类": {"1": "A", "2": "B", "3": "D", ...},
- "商家规模": {"1": "Large", "2": "Special", "3": "Small", ...},
- "仓库类别": {"1": "Large", "2": "Special", "3": "Small", ...}
- }
-
- def transcode(record):
- for key, value in record.items():
- if value in data[key]:
- record[key] = data[key][value]
- return record
-
- # 示例
- record = {
- "商家编码": "1",
- "商品编码": "2",
- "仓库编码": "3",
- ...
- }
-
- transcoded_record = transcode(record)
- print(transcoded_record)
matalb
- % 假设原始数据存储在一个cell array中,名为data
- % data = {...}; % 你需要将这里填充为你的数据
-
- % 定义一个map来存储转码信息
- map = containers.Map();
-
- % 定义你的转码数据
- sellers = {'seller_10', 'seller_11', 'seller_12', 'seller_13', ...};
- products = {'product_1914', 'product_1915', 'product_1916', 'product_1917', ...};
- warehouses = {'wh_1', 'wh_10', 'wh_11', 'wh_12', ...};
- % ... 添加其他数据
-
- % 填充map
- for i = 1:length(sellers)
- map(sellers{i}) = i;
- end
-
- for i = 1:length(products)
- map(products{i}) = i;
- end
-
- for i = 1:length(warehouses)
- map(warehouses{i}) = i;
- end
- % ... 同样地,为其他数据填充map
-
- % 对原始数据进行转码
- rows, cols = size(data);
- for r = 1:rows
- for c = 1:cols
- if isKey(map, data{r, c})
- data{r, c} = map(data{r, c});
- end
- end
- end
-
- % 输出转码后的数据
- disp(data);
然后 就可以进行后续分析了