POJ1007：DNA排序

一、Description

在这里插入图片描述

One measure of unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence DAABEC’‘, this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ZWQM’’ has 6 inversions (it is as unsorted as can be—exactly the reverse of sorted).

You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of sortedness'', from most sorted’’ to ``least sorted’'. All the strings are of the same length.

二、Input

The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.

三、Output

Output the list of input strings, arranged from most sorted'' to least sorted’'. Since two strings can be equally sorted, then output them according to the orginal order.

四、Sample Input

10 6
AACATGAAGG
TTTTGGCCAA
TTTGGCCAAA
GATCAGATTT
CCCGGGGGGA
ATCGATGCAT

五、Sample Output

CCCGGGGGGA
AACATGAAGG
GATCAGATTT
ATCGATGCAT
TTTTGGCCAA
TTTGGCCAAA

六、Source

#include 
#include 
#include 

#define DNA_LEN 50
#define DNA_NUM 100

#define BUFFER_SIZE 10000

typedef struct
{
    int unsortedness;
    char dnaString[DNA_LEN];
}Dna;

typedef struct
{
    int dnaLen;
    int dnaNum;
    Dna dna[DNA_NUM];
    Dna* pDna[DNA_NUM];
}DnaSequence;

DnaSequence DnaSeq;

void GetDnaSequence(DnaSequence *dnaSeq)
{
    int i;

    scanf("%d %d\n", &dnaSeq->dnaLen, &dnaSeq->dnaNum);

    for(i = 0; i < dnaSeq->dnaNum; i++)
    {
        if(NULL == gets(dnaSeq->dna[i].dnaString)) break;

        dnaSeq->pDna[i] = &dnaSeq->dna[i];
    }
}

void PrintDnaSequence(DnaSequence *dnaSeq)
{
    int i;

    for(i = 0; i < dnaSeq->dnaNum; i++)
    {
        printf("%s\n", dnaSeq->pDna[i]->dnaString);
    }
}
/*
void CalcUnsortedness(Dna* dna, int dnaLen)
{
    int delta,i,j;
    dna->unsortedness = 0;
    for(i = 0; i < dnaLen; i++)
    {
        for(j = i+1; j < dnaLen; j++)
        {
            delta = dna->dnaString[i] - dna->dnaString[j];
            if(delta > 0) dna->unsortedness++;
        }
    }
}
*/
void CalcUnsortedness(Dna* dna, int dnaLen)
{
    int i;
    int A = 0, C = 0, G = 0;
    dna->unsortedness = 0;
    for(i = dnaLen - 1; i >= 0; i--)
    {
        switch(dna->dnaString[i])
        {
            case 'A':
                A++;
                break;
            case 'C':
                C++;
                dna->unsortedness += A;
                break;
            case 'G':
                G++;
                dna->unsortedness += A+C;
                break;
            case 'T':
                dna->unsortedness += A+C+G;
                break;
            default:
                break;
        }
    }
}

int SortCmp(const void* elem1, const void* elem2)
{
    Dna* dna1 = (Dna *)(*(size_t*)elem1);
    Dna* dna2 = (Dna *)(*(size_t*)elem2);

    return dna1->unsortedness - dna2->unsortedness;
}

char g_mergeBuffer[BUFFER_SIZE];

void Merge(char* array, int elemSize, int left, int mid, int right, int (*SortCmp)(const void*, const void*))
{
    int i = left;
    int j = mid;
    int bufIdx = 0;

    while(i < mid && j <= right)
    {
        if(SortCmp(&array[i*elemSize], &array[j*elemSize]) <= 0)
        {
            memcpy(&g_mergeBuffer[bufIdx], &array[i*elemSize], elemSize);
            i++;
        }
        else
        {
            memcpy(&g_mergeBuffer[bufIdx], &array[j*elemSize], elemSize);
            j++;
        }
        bufIdx += elemSize;
    }

    for(; i < mid; i++)
    {
        memcpy(&g_mergeBuffer[bufIdx], &array[i*elemSize], elemSize);
        bufIdx += elemSize;
    }

    for(; j <= right; j++)
    {
        memcpy(&g_mergeBuffer[bufIdx], &array[j*elemSize], elemSize);
        bufIdx += elemSize;
    }

    memcpy(&array[left*elemSize], g_mergeBuffer, (right-left+1)*elemSize);
}

void MergeSort(void* array, int arrayLen, int elemSize, int (*SortCmp)(const void*, const void*))
{
    int loop, left, mid, right = 0;

    for(loop = 1; loop < arrayLen; loop *= 2)
    {
        left = 0;
        right = 0;
        while(right < arrayLen - 1)
        {
            mid = left + loop;
            right = (mid + loop - 1 > arrayLen - 1) ? (arrayLen - 1) : (mid + loop - 1);
            Merge((char*)array, elemSize, left, mid, right, SortCmp);
            left = left + loop * 2;
        }
    }
}

void ProcDnaSequence(DnaSequence *dnaSeq)
{
    int i;
    int elemSize = sizeof(dnaSeq->pDna[0]);

    for(i = 0; i < dnaSeq->dnaNum; i++)
    {
        CalcUnsortedness(&dnaSeq->dna[i], dnaSeq->dnaLen);
    }
    MergeSort(dnaSeq->pDna, dnaSeq->dnaNum, elemSize, SortCmp);
}

int main()
{
    GetDnaSequence(&DnaSeq);
    ProcDnaSequence(&DnaSeq);
    PrintDnaSequence(&DnaSeq);
    return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

七、总结

逆序数可以用来描述一个序列混乱程度的量。例如，“DAABEC”的逆序数为5，其中D大于他右边的4个数，E大于他右边的1个数，4+1=5；又如，“ZWQM”的逆序数为3+2+1+0=6。现在有许多长度一样的字符串，每个字符串里面只会出现四种字母（A，T，G，C）。要求编写程序，将这些字符串按照他们的逆序数进行排序。
要求用稳定的排序算法，所以选择了归并排序。计算逆序数原本没想太多用的暴力遍历，但是后来看评论，发现一种有趣的算法。

相关阅读:
多线程系列(九) -ReentrantLock常用方法详解
 消息转化器(解决由于后端给前端响应的格式中不能处理Long类型，因为js只会处理前16位，这样会导致后面的精度丢失)
周志华《机器学习》习题4.4——python实现基于信息熵进行划分选择的决策树算法
 Spring
37 _ 贪心算法：如何用贪心算法实现Huffman压缩编码？
fusion 360制作机械臂
 【C++】类和对象（中）
shell编程基础
 【蓝桥杯国赛真题23】Scratch恐龙跑酷第十三届蓝桥杯图形化编程scratch国赛真题和答案讲解
 coco数据集json文件转换成YOLO版的txt文件代码
原文地址：https://blog.csdn.net/xiaxianba/article/details/128057008