qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

colo-compare.c (45532B)


      1 /*
      2  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
      3  * (a.k.a. Fault Tolerance or Continuous Replication)
      4  *
      5  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
      6  * Copyright (c) 2016 FUJITSU LIMITED
      7  * Copyright (c) 2016 Intel Corporation
      8  *
      9  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
     10  *
     11  * This work is licensed under the terms of the GNU GPL, version 2 or
     12  * later.  See the COPYING file in the top-level directory.
     13  */
     14 
     15 #include "qemu/osdep.h"
     16 #include "qemu/error-report.h"
     17 #include "trace.h"
     18 #include "qapi/error.h"
     19 #include "net/net.h"
     20 #include "net/eth.h"
     21 #include "qom/object_interfaces.h"
     22 #include "qemu/iov.h"
     23 #include "qom/object.h"
     24 #include "net/queue.h"
     25 #include "chardev/char-fe.h"
     26 #include "qemu/sockets.h"
     27 #include "colo.h"
     28 #include "sysemu/iothread.h"
     29 #include "net/colo-compare.h"
     30 #include "migration/colo.h"
     31 #include "migration/migration.h"
     32 #include "util.h"
     33 
     34 #include "block/aio-wait.h"
     35 #include "qemu/coroutine.h"
     36 
     37 #define TYPE_COLO_COMPARE "colo-compare"
     38 typedef struct CompareState CompareState;
     39 DECLARE_INSTANCE_CHECKER(CompareState, COLO_COMPARE,
     40                          TYPE_COLO_COMPARE)
     41 
     42 static QTAILQ_HEAD(, CompareState) net_compares =
     43        QTAILQ_HEAD_INITIALIZER(net_compares);
     44 
     45 static NotifierList colo_compare_notifiers =
     46     NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
     47 
     48 #define COMPARE_READ_LEN_MAX NET_BUFSIZE
     49 #define MAX_QUEUE_SIZE 1024
     50 
     51 #define COLO_COMPARE_FREE_PRIMARY     0x01
     52 #define COLO_COMPARE_FREE_SECONDARY   0x02
     53 
     54 #define REGULAR_PACKET_CHECK_MS 1000
     55 #define DEFAULT_TIME_OUT_MS 3000
     56 
     57 /* #define DEBUG_COLO_PACKETS */
     58 
     59 static QemuMutex colo_compare_mutex;
     60 static bool colo_compare_active;
     61 static QemuMutex event_mtx;
     62 static QemuCond event_complete_cond;
     63 static int event_unhandled_count;
     64 static uint32_t max_queue_size;
     65 
     66 /*
     67  *  + CompareState ++
     68  *  |               |
     69  *  +---------------+   +---------------+         +---------------+
     70  *  |   conn list   + - >      conn     + ------- >      conn     + -- > ......
     71  *  +---------------+   +---------------+         +---------------+
     72  *  |               |     |           |             |          |
     73  *  +---------------+ +---v----+  +---v----+    +---v----+ +---v----+
     74  *                    |primary |  |secondary    |primary | |secondary
     75  *                    |packet  |  |packet  +    |packet  | |packet  +
     76  *                    +--------+  +--------+    +--------+ +--------+
     77  *                        |           |             |          |
     78  *                    +---v----+  +---v----+    +---v----+ +---v----+
     79  *                    |primary |  |secondary    |primary | |secondary
     80  *                    |packet  |  |packet  +    |packet  | |packet  +
     81  *                    +--------+  +--------+    +--------+ +--------+
     82  *                        |           |             |          |
     83  *                    +---v----+  +---v----+    +---v----+ +---v----+
     84  *                    |primary |  |secondary    |primary | |secondary
     85  *                    |packet  |  |packet  +    |packet  | |packet  +
     86  *                    +--------+  +--------+    +--------+ +--------+
     87  */
     88 
     89 typedef struct SendCo {
     90     Coroutine *co;
     91     struct CompareState *s;
     92     CharBackend *chr;
     93     GQueue send_list;
     94     bool notify_remote_frame;
     95     bool done;
     96     int ret;
     97 } SendCo;
     98 
     99 typedef struct SendEntry {
    100     uint32_t size;
    101     uint32_t vnet_hdr_len;
    102     uint8_t *buf;
    103 } SendEntry;
    104 
    105 struct CompareState {
    106     Object parent;
    107 
    108     char *pri_indev;
    109     char *sec_indev;
    110     char *outdev;
    111     char *notify_dev;
    112     CharBackend chr_pri_in;
    113     CharBackend chr_sec_in;
    114     CharBackend chr_out;
    115     CharBackend chr_notify_dev;
    116     SocketReadState pri_rs;
    117     SocketReadState sec_rs;
    118     SocketReadState notify_rs;
    119     SendCo out_sendco;
    120     SendCo notify_sendco;
    121     bool vnet_hdr;
    122     uint64_t compare_timeout;
    123     uint32_t expired_scan_cycle;
    124 
    125     /*
    126      * Record the connection that through the NIC
    127      * Element type: Connection
    128      */
    129     GQueue conn_list;
    130     /* Record the connection without repetition */
    131     GHashTable *connection_track_table;
    132 
    133     IOThread *iothread;
    134     GMainContext *worker_context;
    135     QEMUTimer *packet_check_timer;
    136 
    137     QEMUBH *event_bh;
    138     enum colo_event event;
    139 
    140     QTAILQ_ENTRY(CompareState) next;
    141 };
    142 
    143 typedef struct CompareClass {
    144     ObjectClass parent_class;
    145 } CompareClass;
    146 
    147 enum {
    148     PRIMARY_IN = 0,
    149     SECONDARY_IN,
    150 };
    151 
    152 static const char *colo_mode[] = {
    153     [PRIMARY_IN] = "primary",
    154     [SECONDARY_IN] = "secondary",
    155 };
    156 
    157 static int compare_chr_send(CompareState *s,
    158                             uint8_t *buf,
    159                             uint32_t size,
    160                             uint32_t vnet_hdr_len,
    161                             bool notify_remote_frame,
    162                             bool zero_copy);
    163 
    164 static bool packet_matches_str(const char *str,
    165                                const uint8_t *buf,
    166                                uint32_t packet_len)
    167 {
    168     if (packet_len != strlen(str)) {
    169         return false;
    170     }
    171 
    172     return !memcmp(str, buf, packet_len);
    173 }
    174 
    175 static void notify_remote_frame(CompareState *s)
    176 {
    177     char msg[] = "DO_CHECKPOINT";
    178     int ret = 0;
    179 
    180     ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, false);
    181     if (ret < 0) {
    182         error_report("Notify Xen COLO-frame failed");
    183     }
    184 }
    185 
    186 static void colo_compare_inconsistency_notify(CompareState *s)
    187 {
    188     if (s->notify_dev) {
    189         notify_remote_frame(s);
    190     } else {
    191         notifier_list_notify(&colo_compare_notifiers,
    192                              migrate_get_current());
    193     }
    194 }
    195 
    196 /* Use restricted to colo_insert_packet() */
    197 static gint seq_sorter(Packet *a, Packet *b, gpointer data)
    198 {
    199     return b->tcp_seq - a->tcp_seq;
    200 }
    201 
    202 static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
    203 {
    204     Packet *pkt = data;
    205     struct tcp_hdr *tcphd;
    206 
    207     tcphd = (struct tcp_hdr *)pkt->transport_header;
    208 
    209     pkt->tcp_seq = ntohl(tcphd->th_seq);
    210     pkt->tcp_ack = ntohl(tcphd->th_ack);
    211     /* Need to consider ACK will bigger than uint32_t MAX */
    212     *max_ack = pkt->tcp_ack - *max_ack > 0 ? pkt->tcp_ack : *max_ack;
    213     pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
    214                        + (tcphd->th_off << 2);
    215     pkt->payload_size = pkt->size - pkt->header_size;
    216     pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
    217     pkt->flags = tcphd->th_flags;
    218 }
    219 
    220 /*
    221  * Return 1 on success, if return 0 means the
    222  * packet will be dropped
    223  */
    224 static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
    225 {
    226     if (g_queue_get_length(queue) <= max_queue_size) {
    227         if (pkt->ip->ip_p == IPPROTO_TCP) {
    228             fill_pkt_tcp_info(pkt, max_ack);
    229             g_queue_insert_sorted(queue,
    230                                   pkt,
    231                                   (GCompareDataFunc)seq_sorter,
    232                                   NULL);
    233         } else {
    234             g_queue_push_tail(queue, pkt);
    235         }
    236         return 1;
    237     }
    238     return 0;
    239 }
    240 
    241 /*
    242  * Return 0 on success, if return -1 means the pkt
    243  * is unsupported(arp and ipv6) and will be sent later
    244  */
    245 static int packet_enqueue(CompareState *s, int mode, Connection **con)
    246 {
    247     ConnectionKey key;
    248     Packet *pkt = NULL;
    249     Connection *conn;
    250     int ret;
    251 
    252     if (mode == PRIMARY_IN) {
    253         pkt = packet_new(s->pri_rs.buf,
    254                          s->pri_rs.packet_len,
    255                          s->pri_rs.vnet_hdr_len);
    256     } else {
    257         pkt = packet_new(s->sec_rs.buf,
    258                          s->sec_rs.packet_len,
    259                          s->sec_rs.vnet_hdr_len);
    260     }
    261 
    262     if (parse_packet_early(pkt)) {
    263         packet_destroy(pkt, NULL);
    264         pkt = NULL;
    265         return -1;
    266     }
    267     fill_connection_key(pkt, &key, false);
    268 
    269     conn = connection_get(s->connection_track_table,
    270                           &key,
    271                           &s->conn_list);
    272 
    273     if (!conn->processing) {
    274         g_queue_push_tail(&s->conn_list, conn);
    275         conn->processing = true;
    276     }
    277 
    278     if (mode == PRIMARY_IN) {
    279         ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack);
    280     } else {
    281         ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack);
    282     }
    283 
    284     if (!ret) {
    285         trace_colo_compare_drop_packet(colo_mode[mode],
    286             "queue size too big, drop packet");
    287         packet_destroy(pkt, NULL);
    288         pkt = NULL;
    289     }
    290 
    291     *con = conn;
    292 
    293     return 0;
    294 }
    295 
    296 static inline bool after(uint32_t seq1, uint32_t seq2)
    297 {
    298         return (int32_t)(seq1 - seq2) > 0;
    299 }
    300 
    301 static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
    302 {
    303     int ret;
    304     ret = compare_chr_send(s,
    305                            pkt->data,
    306                            pkt->size,
    307                            pkt->vnet_hdr_len,
    308                            false,
    309                            true);
    310     if (ret < 0) {
    311         error_report("colo send primary packet failed");
    312     }
    313     trace_colo_compare_main("packet same and release packet");
    314     packet_destroy_partial(pkt, NULL);
    315 }
    316 
    317 /*
    318  * The IP packets sent by primary and secondary
    319  * will be compared in here
    320  * TODO support ip fragment, Out-Of-Order
    321  * return:    0  means packet same
    322  *            > 0 || < 0 means packet different
    323  */
    324 static int colo_compare_packet_payload(Packet *ppkt,
    325                                        Packet *spkt,
    326                                        uint16_t poffset,
    327                                        uint16_t soffset,
    328                                        uint16_t len)
    329 
    330 {
    331     if (trace_event_get_state_backends(TRACE_COLO_COMPARE_IP_INFO)) {
    332         char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
    333 
    334         strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
    335         strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
    336         strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
    337         strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
    338 
    339         trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
    340                                    pri_ip_dst, spkt->size,
    341                                    sec_ip_src, sec_ip_dst);
    342     }
    343 
    344     return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
    345 }
    346 
    347 /*
    348  * return true means that the payload is consist and
    349  * need to make the next comparison, false means do
    350  * the checkpoint
    351 */
    352 static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
    353                               int8_t *mark, uint32_t max_ack)
    354 {
    355     *mark = 0;
    356 
    357     if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
    358         if (!colo_compare_packet_payload(ppkt, spkt,
    359                                         ppkt->header_size, spkt->header_size,
    360                                         ppkt->payload_size)) {
    361             *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
    362             return true;
    363         }
    364     }
    365 
    366     /* one part of secondary packet payload still need to be compared */
    367     if (!after(ppkt->seq_end, spkt->seq_end)) {
    368         if (!colo_compare_packet_payload(ppkt, spkt,
    369                                         ppkt->header_size + ppkt->offset,
    370                                         spkt->header_size + spkt->offset,
    371                                         ppkt->payload_size - ppkt->offset)) {
    372             if (!after(ppkt->tcp_ack, max_ack)) {
    373                 *mark = COLO_COMPARE_FREE_PRIMARY;
    374                 spkt->offset += ppkt->payload_size - ppkt->offset;
    375                 return true;
    376             } else {
    377                 /* secondary guest hasn't ack the data, don't send
    378                  * out this packet
    379                  */
    380                 return false;
    381             }
    382         }
    383     } else {
    384         /* primary packet is longer than secondary packet, compare
    385          * the same part and mark the primary packet offset
    386          */
    387         if (!colo_compare_packet_payload(ppkt, spkt,
    388                                         ppkt->header_size + ppkt->offset,
    389                                         spkt->header_size + spkt->offset,
    390                                         spkt->payload_size - spkt->offset)) {
    391             *mark = COLO_COMPARE_FREE_SECONDARY;
    392             ppkt->offset += spkt->payload_size - spkt->offset;
    393             return true;
    394         }
    395     }
    396 
    397     return false;
    398 }
    399 
    400 static void colo_compare_tcp(CompareState *s, Connection *conn)
    401 {
    402     Packet *ppkt = NULL, *spkt = NULL;
    403     int8_t mark;
    404 
    405     /*
    406      * If ppkt and spkt have the same payload, but ppkt's ACK
    407      * is greater than spkt's ACK, in this case we can not
    408      * send the ppkt because it will cause the secondary guest
    409      * to miss sending some data in the next. Therefore, we
    410      * record the maximum ACK in the current queue at both
    411      * primary side and secondary side. Only when the ack is
    412      * less than the smaller of the two maximum ack, then we
    413      * can ensure that the packet's payload is acknowledged by
    414      * primary and secondary.
    415     */
    416     uint32_t min_ack = conn->pack - conn->sack > 0 ?
    417                        conn->sack : conn->pack;
    418 
    419 pri:
    420     if (g_queue_is_empty(&conn->primary_list)) {
    421         return;
    422     }
    423     ppkt = g_queue_pop_tail(&conn->primary_list);
    424 sec:
    425     if (g_queue_is_empty(&conn->secondary_list)) {
    426         g_queue_push_tail(&conn->primary_list, ppkt);
    427         return;
    428     }
    429     spkt = g_queue_pop_tail(&conn->secondary_list);
    430 
    431     if (ppkt->tcp_seq == ppkt->seq_end) {
    432         colo_release_primary_pkt(s, ppkt);
    433         ppkt = NULL;
    434     }
    435 
    436     if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
    437         trace_colo_compare_main("pri: this packet has compared");
    438         colo_release_primary_pkt(s, ppkt);
    439         ppkt = NULL;
    440     }
    441 
    442     if (spkt->tcp_seq == spkt->seq_end) {
    443         packet_destroy(spkt, NULL);
    444         if (!ppkt) {
    445             goto pri;
    446         } else {
    447             goto sec;
    448         }
    449     } else {
    450         if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
    451             trace_colo_compare_main("sec: this packet has compared");
    452             packet_destroy(spkt, NULL);
    453             if (!ppkt) {
    454                 goto pri;
    455             } else {
    456                 goto sec;
    457             }
    458         }
    459         if (!ppkt) {
    460             g_queue_push_tail(&conn->secondary_list, spkt);
    461             goto pri;
    462         }
    463     }
    464 
    465     if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
    466         trace_colo_compare_tcp_info("pri",
    467                                     ppkt->tcp_seq, ppkt->tcp_ack,
    468                                     ppkt->header_size, ppkt->payload_size,
    469                                     ppkt->offset, ppkt->flags);
    470 
    471         trace_colo_compare_tcp_info("sec",
    472                                     spkt->tcp_seq, spkt->tcp_ack,
    473                                     spkt->header_size, spkt->payload_size,
    474                                     spkt->offset, spkt->flags);
    475 
    476         if (mark == COLO_COMPARE_FREE_PRIMARY) {
    477             conn->compare_seq = ppkt->seq_end;
    478             colo_release_primary_pkt(s, ppkt);
    479             g_queue_push_tail(&conn->secondary_list, spkt);
    480             goto pri;
    481         } else if (mark == COLO_COMPARE_FREE_SECONDARY) {
    482             conn->compare_seq = spkt->seq_end;
    483             packet_destroy(spkt, NULL);
    484             goto sec;
    485         } else if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
    486             conn->compare_seq = ppkt->seq_end;
    487             colo_release_primary_pkt(s, ppkt);
    488             packet_destroy(spkt, NULL);
    489             goto pri;
    490         }
    491     } else {
    492         g_queue_push_tail(&conn->primary_list, ppkt);
    493         g_queue_push_tail(&conn->secondary_list, spkt);
    494 
    495 #ifdef DEBUG_COLO_PACKETS
    496         qemu_hexdump(stderr, "colo-compare ppkt", ppkt->data, ppkt->size);
    497         qemu_hexdump(stderr, "colo-compare spkt", spkt->data, spkt->size);
    498 #endif
    499 
    500         colo_compare_inconsistency_notify(s);
    501     }
    502 }
    503 
    504 
    505 /*
    506  * Called from the compare thread on the primary
    507  * for compare udp packet
    508  */
    509 static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
    510 {
    511     uint16_t network_header_length = ppkt->ip->ip_hl << 2;
    512     uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
    513 
    514     trace_colo_compare_main("compare udp");
    515 
    516     /*
    517      * Because of ppkt and spkt are both in the same connection,
    518      * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
    519      * same with spkt. In addition, IP header's Identification is a random
    520      * field, we can handle it in IP fragmentation function later.
    521      * COLO just concern the response net packet payload from primary guest
    522      * and secondary guest are same or not, So we ignored all IP header include
    523      * other field like TOS,TTL,IP Checksum. we only need to compare
    524      * the ip payload here.
    525      */
    526     if (ppkt->size != spkt->size) {
    527         trace_colo_compare_main("UDP: payload size of packets are different");
    528         return -1;
    529     }
    530     if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
    531                                     ppkt->size - offset)) {
    532         trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
    533         trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
    534 #ifdef DEBUG_COLO_PACKETS
    535         qemu_hexdump(stderr, "colo-compare pri pkt", ppkt->data, ppkt->size);
    536         qemu_hexdump(stderr, "colo-compare sec pkt", spkt->data, spkt->size);
    537 #endif
    538         return -1;
    539     } else {
    540         return 0;
    541     }
    542 }
    543 
    544 /*
    545  * Called from the compare thread on the primary
    546  * for compare icmp packet
    547  */
    548 static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
    549 {
    550     uint16_t network_header_length = ppkt->ip->ip_hl << 2;
    551     uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
    552 
    553     trace_colo_compare_main("compare icmp");
    554 
    555     /*
    556      * Because of ppkt and spkt are both in the same connection,
    557      * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
    558      * same with spkt. In addition, IP header's Identification is a random
    559      * field, we can handle it in IP fragmentation function later.
    560      * COLO just concern the response net packet payload from primary guest
    561      * and secondary guest are same or not, So we ignored all IP header include
    562      * other field like TOS,TTL,IP Checksum. we only need to compare
    563      * the ip payload here.
    564      */
    565     if (ppkt->size != spkt->size) {
    566         trace_colo_compare_main("ICMP: payload size of packets are different");
    567         return -1;
    568     }
    569     if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
    570                                     ppkt->size - offset)) {
    571         trace_colo_compare_icmp_miscompare("primary pkt size",
    572                                            ppkt->size);
    573         trace_colo_compare_icmp_miscompare("Secondary pkt size",
    574                                            spkt->size);
    575 #ifdef DEBUG_COLO_PACKETS
    576         qemu_hexdump(stderr, "colo-compare pri pkt", ppkt->data, ppkt->size);
    577         qemu_hexdump(stderr, "colo-compare sec pkt", spkt->data, spkt->size);
    578 #endif
    579         return -1;
    580     } else {
    581         return 0;
    582     }
    583 }
    584 
    585 /*
    586  * Called from the compare thread on the primary
    587  * for compare other packet
    588  */
    589 static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
    590 {
    591     uint16_t offset = ppkt->vnet_hdr_len;
    592 
    593     trace_colo_compare_main("compare other");
    594     if (ppkt->size != spkt->size) {
    595         trace_colo_compare_main("Other: payload size of packets are different");
    596         return -1;
    597     }
    598     return colo_compare_packet_payload(ppkt, spkt, offset, offset,
    599                                        ppkt->size - offset);
    600 }
    601 
    602 static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
    603 {
    604     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
    605 
    606     if ((now - pkt->creation_ms) > (*check_time)) {
    607         trace_colo_old_packet_check_found(pkt->creation_ms);
    608         return 0;
    609     } else {
    610         return 1;
    611     }
    612 }
    613 
    614 void colo_compare_register_notifier(Notifier *notify)
    615 {
    616     notifier_list_add(&colo_compare_notifiers, notify);
    617 }
    618 
    619 void colo_compare_unregister_notifier(Notifier *notify)
    620 {
    621     notifier_remove(notify);
    622 }
    623 
    624 static int colo_old_packet_check_one_conn(Connection *conn,
    625                                           CompareState *s)
    626 {
    627     if (!g_queue_is_empty(&conn->primary_list)) {
    628         if (g_queue_find_custom(&conn->primary_list,
    629                                 &s->compare_timeout,
    630                                 (GCompareFunc)colo_old_packet_check_one))
    631             goto out;
    632     }
    633 
    634     if (!g_queue_is_empty(&conn->secondary_list)) {
    635         if (g_queue_find_custom(&conn->secondary_list,
    636                                 &s->compare_timeout,
    637                                 (GCompareFunc)colo_old_packet_check_one))
    638             goto out;
    639     }
    640 
    641     return 1;
    642 
    643 out:
    644     /* Do checkpoint will flush old packet */
    645     colo_compare_inconsistency_notify(s);
    646     return 0;
    647 }
    648 
    649 /*
    650  * Look for old packets that the secondary hasn't matched,
    651  * if we have some then we have to checkpoint to wake
    652  * the secondary up.
    653  */
    654 static void colo_old_packet_check(void *opaque)
    655 {
    656     CompareState *s = opaque;
    657 
    658     /*
    659      * If we find one old packet, stop finding job and notify
    660      * COLO frame do checkpoint.
    661      */
    662     g_queue_find_custom(&s->conn_list, s,
    663                         (GCompareFunc)colo_old_packet_check_one_conn);
    664 }
    665 
    666 static void colo_compare_packet(CompareState *s, Connection *conn,
    667                                 int (*HandlePacket)(Packet *spkt,
    668                                 Packet *ppkt))
    669 {
    670     Packet *pkt = NULL;
    671     GList *result = NULL;
    672 
    673     while (!g_queue_is_empty(&conn->primary_list) &&
    674            !g_queue_is_empty(&conn->secondary_list)) {
    675         pkt = g_queue_pop_tail(&conn->primary_list);
    676         result = g_queue_find_custom(&conn->secondary_list,
    677                  pkt, (GCompareFunc)HandlePacket);
    678 
    679         if (result) {
    680             colo_release_primary_pkt(s, pkt);
    681             packet_destroy(result->data, NULL);
    682             g_queue_delete_link(&conn->secondary_list, result);
    683         } else {
    684             /*
    685              * If one packet arrive late, the secondary_list or
    686              * primary_list will be empty, so we can't compare it
    687              * until next comparison. If the packets in the list are
    688              * timeout, it will trigger a checkpoint request.
    689              */
    690             trace_colo_compare_main("packet different");
    691             g_queue_push_tail(&conn->primary_list, pkt);
    692 
    693             colo_compare_inconsistency_notify(s);
    694             break;
    695         }
    696     }
    697 }
    698 
    699 /*
    700  * Called from the compare thread on the primary
    701  * for compare packet with secondary list of the
    702  * specified connection when a new packet was
    703  * queued to it.
    704  */
    705 static void colo_compare_connection(void *opaque, void *user_data)
    706 {
    707     CompareState *s = user_data;
    708     Connection *conn = opaque;
    709 
    710     switch (conn->ip_proto) {
    711     case IPPROTO_TCP:
    712         colo_compare_tcp(s, conn);
    713         break;
    714     case IPPROTO_UDP:
    715         colo_compare_packet(s, conn, colo_packet_compare_udp);
    716         break;
    717     case IPPROTO_ICMP:
    718         colo_compare_packet(s, conn, colo_packet_compare_icmp);
    719         break;
    720     default:
    721         colo_compare_packet(s, conn, colo_packet_compare_other);
    722         break;
    723     }
    724 }
    725 
    726 static void coroutine_fn _compare_chr_send(void *opaque)
    727 {
    728     SendCo *sendco = opaque;
    729     CompareState *s = sendco->s;
    730     int ret = 0;
    731 
    732     while (!g_queue_is_empty(&sendco->send_list)) {
    733         SendEntry *entry = g_queue_pop_tail(&sendco->send_list);
    734         uint32_t len = htonl(entry->size);
    735 
    736         ret = qemu_chr_fe_write_all(sendco->chr, (uint8_t *)&len, sizeof(len));
    737 
    738         if (ret != sizeof(len)) {
    739             g_free(entry->buf);
    740             g_slice_free(SendEntry, entry);
    741             goto err;
    742         }
    743 
    744         if (!sendco->notify_remote_frame && s->vnet_hdr) {
    745             /*
    746              * We send vnet header len make other module(like filter-redirector)
    747              * know how to parse net packet correctly.
    748              */
    749             len = htonl(entry->vnet_hdr_len);
    750 
    751             ret = qemu_chr_fe_write_all(sendco->chr,
    752                                         (uint8_t *)&len,
    753                                         sizeof(len));
    754 
    755             if (ret != sizeof(len)) {
    756                 g_free(entry->buf);
    757                 g_slice_free(SendEntry, entry);
    758                 goto err;
    759             }
    760         }
    761 
    762         ret = qemu_chr_fe_write_all(sendco->chr,
    763                                     (uint8_t *)entry->buf,
    764                                     entry->size);
    765 
    766         if (ret != entry->size) {
    767             g_free(entry->buf);
    768             g_slice_free(SendEntry, entry);
    769             goto err;
    770         }
    771 
    772         g_free(entry->buf);
    773         g_slice_free(SendEntry, entry);
    774     }
    775 
    776     sendco->ret = 0;
    777     goto out;
    778 
    779 err:
    780     while (!g_queue_is_empty(&sendco->send_list)) {
    781         SendEntry *entry = g_queue_pop_tail(&sendco->send_list);
    782         g_free(entry->buf);
    783         g_slice_free(SendEntry, entry);
    784     }
    785     sendco->ret = ret < 0 ? ret : -EIO;
    786 out:
    787     sendco->co = NULL;
    788     sendco->done = true;
    789     aio_wait_kick();
    790 }
    791 
    792 static int compare_chr_send(CompareState *s,
    793                             uint8_t *buf,
    794                             uint32_t size,
    795                             uint32_t vnet_hdr_len,
    796                             bool notify_remote_frame,
    797                             bool zero_copy)
    798 {
    799     SendCo *sendco;
    800     SendEntry *entry;
    801 
    802     if (notify_remote_frame) {
    803         sendco = &s->notify_sendco;
    804     } else {
    805         sendco = &s->out_sendco;
    806     }
    807 
    808     if (!size) {
    809         return -1;
    810     }
    811 
    812     entry = g_slice_new(SendEntry);
    813     entry->size = size;
    814     entry->vnet_hdr_len = vnet_hdr_len;
    815     if (zero_copy) {
    816         entry->buf = buf;
    817     } else {
    818         entry->buf = g_malloc(size);
    819         memcpy(entry->buf, buf, size);
    820     }
    821     g_queue_push_tail(&sendco->send_list, entry);
    822 
    823     if (sendco->done) {
    824         sendco->co = qemu_coroutine_create(_compare_chr_send, sendco);
    825         sendco->done = false;
    826         qemu_coroutine_enter(sendco->co);
    827         if (sendco->done) {
    828             /* report early errors */
    829             return sendco->ret;
    830         }
    831     }
    832 
    833     /* assume success */
    834     return 0;
    835 }
    836 
    837 static int compare_chr_can_read(void *opaque)
    838 {
    839     return COMPARE_READ_LEN_MAX;
    840 }
    841 
    842 /*
    843  * Called from the main thread on the primary for packets
    844  * arriving over the socket from the primary.
    845  */
    846 static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
    847 {
    848     CompareState *s = COLO_COMPARE(opaque);
    849     int ret;
    850 
    851     ret = net_fill_rstate(&s->pri_rs, buf, size);
    852     if (ret == -1) {
    853         qemu_chr_fe_set_handlers(&s->chr_pri_in, NULL, NULL, NULL, NULL,
    854                                  NULL, NULL, true);
    855         error_report("colo-compare primary_in error");
    856     }
    857 }
    858 
    859 /*
    860  * Called from the main thread on the primary for packets
    861  * arriving over the socket from the secondary.
    862  */
    863 static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
    864 {
    865     CompareState *s = COLO_COMPARE(opaque);
    866     int ret;
    867 
    868     ret = net_fill_rstate(&s->sec_rs, buf, size);
    869     if (ret == -1) {
    870         qemu_chr_fe_set_handlers(&s->chr_sec_in, NULL, NULL, NULL, NULL,
    871                                  NULL, NULL, true);
    872         error_report("colo-compare secondary_in error");
    873     }
    874 }
    875 
    876 static void compare_notify_chr(void *opaque, const uint8_t *buf, int size)
    877 {
    878     CompareState *s = COLO_COMPARE(opaque);
    879     int ret;
    880 
    881     ret = net_fill_rstate(&s->notify_rs, buf, size);
    882     if (ret == -1) {
    883         qemu_chr_fe_set_handlers(&s->chr_notify_dev, NULL, NULL, NULL, NULL,
    884                                  NULL, NULL, true);
    885         error_report("colo-compare notify_dev error");
    886     }
    887 }
    888 
    889 /*
    890  * Check old packet regularly so it can watch for any packets
    891  * that the secondary hasn't produced equivalents of.
    892  */
    893 static void check_old_packet_regular(void *opaque)
    894 {
    895     CompareState *s = opaque;
    896 
    897     /* if have old packet we will notify checkpoint */
    898     colo_old_packet_check(s);
    899     timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
    900               s->expired_scan_cycle);
    901 }
    902 
    903 /* Public API, Used for COLO frame to notify compare event */
    904 void colo_notify_compares_event(void *opaque, int event, Error **errp)
    905 {
    906     CompareState *s;
    907     qemu_mutex_lock(&colo_compare_mutex);
    908 
    909     if (!colo_compare_active) {
    910         qemu_mutex_unlock(&colo_compare_mutex);
    911         return;
    912     }
    913 
    914     qemu_mutex_lock(&event_mtx);
    915     QTAILQ_FOREACH(s, &net_compares, next) {
    916         s->event = event;
    917         qemu_bh_schedule(s->event_bh);
    918         event_unhandled_count++;
    919     }
    920     /* Wait all compare threads to finish handling this event */
    921     while (event_unhandled_count > 0) {
    922         qemu_cond_wait(&event_complete_cond, &event_mtx);
    923     }
    924 
    925     qemu_mutex_unlock(&event_mtx);
    926     qemu_mutex_unlock(&colo_compare_mutex);
    927 }
    928 
    929 static void colo_compare_timer_init(CompareState *s)
    930 {
    931     AioContext *ctx = iothread_get_aio_context(s->iothread);
    932 
    933     s->packet_check_timer = aio_timer_new(ctx, QEMU_CLOCK_HOST,
    934                                 SCALE_MS, check_old_packet_regular,
    935                                 s);
    936     timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
    937               s->expired_scan_cycle);
    938 }
    939 
    940 static void colo_compare_timer_del(CompareState *s)
    941 {
    942     if (s->packet_check_timer) {
    943         timer_free(s->packet_check_timer);
    944         s->packet_check_timer = NULL;
    945     }
    946  }
    947 
    948 static void colo_flush_packets(void *opaque, void *user_data);
    949 
    950 static void colo_compare_handle_event(void *opaque)
    951 {
    952     CompareState *s = opaque;
    953 
    954     switch (s->event) {
    955     case COLO_EVENT_CHECKPOINT:
    956         g_queue_foreach(&s->conn_list, colo_flush_packets, s);
    957         break;
    958     case COLO_EVENT_FAILOVER:
    959         break;
    960     default:
    961         break;
    962     }
    963 
    964     qemu_mutex_lock(&event_mtx);
    965     assert(event_unhandled_count > 0);
    966     event_unhandled_count--;
    967     qemu_cond_broadcast(&event_complete_cond);
    968     qemu_mutex_unlock(&event_mtx);
    969 }
    970 
    971 static void colo_compare_iothread(CompareState *s)
    972 {
    973     AioContext *ctx = iothread_get_aio_context(s->iothread);
    974     object_ref(OBJECT(s->iothread));
    975     s->worker_context = iothread_get_g_main_context(s->iothread);
    976 
    977     qemu_chr_fe_set_handlers(&s->chr_pri_in, compare_chr_can_read,
    978                              compare_pri_chr_in, NULL, NULL,
    979                              s, s->worker_context, true);
    980     qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read,
    981                              compare_sec_chr_in, NULL, NULL,
    982                              s, s->worker_context, true);
    983     if (s->notify_dev) {
    984         qemu_chr_fe_set_handlers(&s->chr_notify_dev, compare_chr_can_read,
    985                                  compare_notify_chr, NULL, NULL,
    986                                  s, s->worker_context, true);
    987     }
    988 
    989     colo_compare_timer_init(s);
    990     s->event_bh = aio_bh_new(ctx, colo_compare_handle_event, s);
    991 }
    992 
    993 static char *compare_get_pri_indev(Object *obj, Error **errp)
    994 {
    995     CompareState *s = COLO_COMPARE(obj);
    996 
    997     return g_strdup(s->pri_indev);
    998 }
    999 
   1000 static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
   1001 {
   1002     CompareState *s = COLO_COMPARE(obj);
   1003 
   1004     g_free(s->pri_indev);
   1005     s->pri_indev = g_strdup(value);
   1006 }
   1007 
   1008 static char *compare_get_sec_indev(Object *obj, Error **errp)
   1009 {
   1010     CompareState *s = COLO_COMPARE(obj);
   1011 
   1012     return g_strdup(s->sec_indev);
   1013 }
   1014 
   1015 static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
   1016 {
   1017     CompareState *s = COLO_COMPARE(obj);
   1018 
   1019     g_free(s->sec_indev);
   1020     s->sec_indev = g_strdup(value);
   1021 }
   1022 
   1023 static char *compare_get_outdev(Object *obj, Error **errp)
   1024 {
   1025     CompareState *s = COLO_COMPARE(obj);
   1026 
   1027     return g_strdup(s->outdev);
   1028 }
   1029 
   1030 static void compare_set_outdev(Object *obj, const char *value, Error **errp)
   1031 {
   1032     CompareState *s = COLO_COMPARE(obj);
   1033 
   1034     g_free(s->outdev);
   1035     s->outdev = g_strdup(value);
   1036 }
   1037 
   1038 static bool compare_get_vnet_hdr(Object *obj, Error **errp)
   1039 {
   1040     CompareState *s = COLO_COMPARE(obj);
   1041 
   1042     return s->vnet_hdr;
   1043 }
   1044 
   1045 static void compare_set_vnet_hdr(Object *obj,
   1046                                  bool value,
   1047                                  Error **errp)
   1048 {
   1049     CompareState *s = COLO_COMPARE(obj);
   1050 
   1051     s->vnet_hdr = value;
   1052 }
   1053 
   1054 static char *compare_get_notify_dev(Object *obj, Error **errp)
   1055 {
   1056     CompareState *s = COLO_COMPARE(obj);
   1057 
   1058     return g_strdup(s->notify_dev);
   1059 }
   1060 
   1061 static void compare_set_notify_dev(Object *obj, const char *value, Error **errp)
   1062 {
   1063     CompareState *s = COLO_COMPARE(obj);
   1064 
   1065     g_free(s->notify_dev);
   1066     s->notify_dev = g_strdup(value);
   1067 }
   1068 
   1069 static void compare_get_timeout(Object *obj, Visitor *v,
   1070                                 const char *name, void *opaque,
   1071                                 Error **errp)
   1072 {
   1073     CompareState *s = COLO_COMPARE(obj);
   1074     uint64_t value = s->compare_timeout;
   1075 
   1076     visit_type_uint64(v, name, &value, errp);
   1077 }
   1078 
   1079 static void compare_set_timeout(Object *obj, Visitor *v,
   1080                                 const char *name, void *opaque,
   1081                                 Error **errp)
   1082 {
   1083     CompareState *s = COLO_COMPARE(obj);
   1084     uint32_t value;
   1085 
   1086     if (!visit_type_uint32(v, name, &value, errp)) {
   1087         return;
   1088     }
   1089     if (!value) {
   1090         error_setg(errp, "Property '%s.%s' requires a positive value",
   1091                    object_get_typename(obj), name);
   1092         return;
   1093     }
   1094     s->compare_timeout = value;
   1095 }
   1096 
   1097 static void compare_get_expired_scan_cycle(Object *obj, Visitor *v,
   1098                                            const char *name, void *opaque,
   1099                                            Error **errp)
   1100 {
   1101     CompareState *s = COLO_COMPARE(obj);
   1102     uint32_t value = s->expired_scan_cycle;
   1103 
   1104     visit_type_uint32(v, name, &value, errp);
   1105 }
   1106 
   1107 static void compare_set_expired_scan_cycle(Object *obj, Visitor *v,
   1108                                            const char *name, void *opaque,
   1109                                            Error **errp)
   1110 {
   1111     CompareState *s = COLO_COMPARE(obj);
   1112     uint32_t value;
   1113 
   1114     if (!visit_type_uint32(v, name, &value, errp)) {
   1115         return;
   1116     }
   1117     if (!value) {
   1118         error_setg(errp, "Property '%s.%s' requires a positive value",
   1119                    object_get_typename(obj), name);
   1120         return;
   1121     }
   1122     s->expired_scan_cycle = value;
   1123 }
   1124 
   1125 static void get_max_queue_size(Object *obj, Visitor *v,
   1126                                const char *name, void *opaque,
   1127                                Error **errp)
   1128 {
   1129     uint32_t value = max_queue_size;
   1130 
   1131     visit_type_uint32(v, name, &value, errp);
   1132 }
   1133 
   1134 static void set_max_queue_size(Object *obj, Visitor *v,
   1135                                const char *name, void *opaque,
   1136                                Error **errp)
   1137 {
   1138     Error *local_err = NULL;
   1139     uint64_t value;
   1140 
   1141     visit_type_uint64(v, name, &value, &local_err);
   1142     if (local_err) {
   1143         goto out;
   1144     }
   1145     if (!value) {
   1146         error_setg(&local_err, "Property '%s.%s' requires a positive value",
   1147                    object_get_typename(obj), name);
   1148         goto out;
   1149     }
   1150     max_queue_size = value;
   1151 
   1152 out:
   1153     error_propagate(errp, local_err);
   1154 }
   1155 
   1156 static void compare_pri_rs_finalize(SocketReadState *pri_rs)
   1157 {
   1158     CompareState *s = container_of(pri_rs, CompareState, pri_rs);
   1159     Connection *conn = NULL;
   1160 
   1161     if (packet_enqueue(s, PRIMARY_IN, &conn)) {
   1162         trace_colo_compare_main("primary: unsupported packet in");
   1163         compare_chr_send(s,
   1164                          pri_rs->buf,
   1165                          pri_rs->packet_len,
   1166                          pri_rs->vnet_hdr_len,
   1167                          false,
   1168                          false);
   1169     } else {
   1170         /* compare packet in the specified connection */
   1171         colo_compare_connection(conn, s);
   1172     }
   1173 }
   1174 
   1175 static void compare_sec_rs_finalize(SocketReadState *sec_rs)
   1176 {
   1177     CompareState *s = container_of(sec_rs, CompareState, sec_rs);
   1178     Connection *conn = NULL;
   1179 
   1180     if (packet_enqueue(s, SECONDARY_IN, &conn)) {
   1181         trace_colo_compare_main("secondary: unsupported packet in");
   1182     } else {
   1183         /* compare packet in the specified connection */
   1184         colo_compare_connection(conn, s);
   1185     }
   1186 }
   1187 
   1188 static void compare_notify_rs_finalize(SocketReadState *notify_rs)
   1189 {
   1190     CompareState *s = container_of(notify_rs, CompareState, notify_rs);
   1191 
   1192     const char msg[] = "COLO_COMPARE_GET_XEN_INIT";
   1193     int ret;
   1194 
   1195     if (packet_matches_str("COLO_USERSPACE_PROXY_INIT",
   1196                            notify_rs->buf,
   1197                            notify_rs->packet_len)) {
   1198         ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, false);
   1199         if (ret < 0) {
   1200             error_report("Notify Xen COLO-frame INIT failed");
   1201         }
   1202     } else if (packet_matches_str("COLO_CHECKPOINT",
   1203                                   notify_rs->buf,
   1204                                   notify_rs->packet_len)) {
   1205         /* colo-compare do checkpoint, flush pri packet and remove sec packet */
   1206         g_queue_foreach(&s->conn_list, colo_flush_packets, s);
   1207     } else {
   1208         error_report("COLO compare got unsupported instruction");
   1209     }
   1210 }
   1211 
   1212 /*
   1213  * Return 0 is success.
   1214  * Return 1 is failed.
   1215  */
   1216 static int find_and_check_chardev(Chardev **chr,
   1217                                   char *chr_name,
   1218                                   Error **errp)
   1219 {
   1220     *chr = qemu_chr_find(chr_name);
   1221     if (*chr == NULL) {
   1222         error_setg(errp, "Device '%s' not found",
   1223                    chr_name);
   1224         return 1;
   1225     }
   1226 
   1227     if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
   1228         error_setg(errp, "chardev \"%s\" is not reconnectable",
   1229                    chr_name);
   1230         return 1;
   1231     }
   1232 
   1233     if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_GCONTEXT)) {
   1234         error_setg(errp, "chardev \"%s\" cannot switch context",
   1235                    chr_name);
   1236         return 1;
   1237     }
   1238 
   1239     return 0;
   1240 }
   1241 
   1242 /*
   1243  * Called from the main thread on the primary
   1244  * to setup colo-compare.
   1245  */
   1246 static void colo_compare_complete(UserCreatable *uc, Error **errp)
   1247 {
   1248     CompareState *s = COLO_COMPARE(uc);
   1249     Chardev *chr;
   1250 
   1251     if (!s->pri_indev || !s->sec_indev || !s->outdev || !s->iothread) {
   1252         error_setg(errp, "colo compare needs 'primary_in' ,"
   1253                    "'secondary_in','outdev','iothread' property set");
   1254         return;
   1255     } else if (!strcmp(s->pri_indev, s->outdev) ||
   1256                !strcmp(s->sec_indev, s->outdev) ||
   1257                !strcmp(s->pri_indev, s->sec_indev)) {
   1258         error_setg(errp, "'indev' and 'outdev' could not be same "
   1259                    "for compare module");
   1260         return;
   1261     }
   1262 
   1263     if (!s->compare_timeout) {
   1264         /* Set default value to 3000 MS */
   1265         s->compare_timeout = DEFAULT_TIME_OUT_MS;
   1266     }
   1267 
   1268     if (!s->expired_scan_cycle) {
   1269         /* Set default value to 1000 MS */
   1270         s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS;
   1271     }
   1272 
   1273     if (!max_queue_size) {
   1274         /* Set default queue size to 1024 */
   1275         max_queue_size = MAX_QUEUE_SIZE;
   1276     }
   1277 
   1278     if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
   1279         !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
   1280         return;
   1281     }
   1282 
   1283     if (find_and_check_chardev(&chr, s->sec_indev, errp) ||
   1284         !qemu_chr_fe_init(&s->chr_sec_in, chr, errp)) {
   1285         return;
   1286     }
   1287 
   1288     if (find_and_check_chardev(&chr, s->outdev, errp) ||
   1289         !qemu_chr_fe_init(&s->chr_out, chr, errp)) {
   1290         return;
   1291     }
   1292 
   1293     net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
   1294     net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
   1295 
   1296     /* Try to enable remote notify chardev, currently just for Xen COLO */
   1297     if (s->notify_dev) {
   1298         if (find_and_check_chardev(&chr, s->notify_dev, errp) ||
   1299             !qemu_chr_fe_init(&s->chr_notify_dev, chr, errp)) {
   1300             return;
   1301         }
   1302 
   1303         net_socket_rs_init(&s->notify_rs, compare_notify_rs_finalize,
   1304                            s->vnet_hdr);
   1305     }
   1306 
   1307     s->out_sendco.s = s;
   1308     s->out_sendco.chr = &s->chr_out;
   1309     s->out_sendco.notify_remote_frame = false;
   1310     s->out_sendco.done = true;
   1311     g_queue_init(&s->out_sendco.send_list);
   1312 
   1313     if (s->notify_dev) {
   1314         s->notify_sendco.s = s;
   1315         s->notify_sendco.chr = &s->chr_notify_dev;
   1316         s->notify_sendco.notify_remote_frame = true;
   1317         s->notify_sendco.done = true;
   1318         g_queue_init(&s->notify_sendco.send_list);
   1319     }
   1320 
   1321     g_queue_init(&s->conn_list);
   1322 
   1323     s->connection_track_table = g_hash_table_new_full(connection_key_hash,
   1324                                                       connection_key_equal,
   1325                                                       g_free,
   1326                                                       NULL);
   1327 
   1328     colo_compare_iothread(s);
   1329 
   1330     qemu_mutex_lock(&colo_compare_mutex);
   1331     if (!colo_compare_active) {
   1332         qemu_mutex_init(&event_mtx);
   1333         qemu_cond_init(&event_complete_cond);
   1334         colo_compare_active = true;
   1335     }
   1336     QTAILQ_INSERT_TAIL(&net_compares, s, next);
   1337     qemu_mutex_unlock(&colo_compare_mutex);
   1338 
   1339     return;
   1340 }
   1341 
   1342 static void colo_flush_packets(void *opaque, void *user_data)
   1343 {
   1344     CompareState *s = user_data;
   1345     Connection *conn = opaque;
   1346     Packet *pkt = NULL;
   1347 
   1348     while (!g_queue_is_empty(&conn->primary_list)) {
   1349         pkt = g_queue_pop_tail(&conn->primary_list);
   1350         compare_chr_send(s,
   1351                          pkt->data,
   1352                          pkt->size,
   1353                          pkt->vnet_hdr_len,
   1354                          false,
   1355                          true);
   1356         packet_destroy_partial(pkt, NULL);
   1357     }
   1358     while (!g_queue_is_empty(&conn->secondary_list)) {
   1359         pkt = g_queue_pop_tail(&conn->secondary_list);
   1360         packet_destroy(pkt, NULL);
   1361     }
   1362 }
   1363 
   1364 static void colo_compare_class_init(ObjectClass *oc, void *data)
   1365 {
   1366     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
   1367 
   1368     ucc->complete = colo_compare_complete;
   1369 }
   1370 
   1371 static void colo_compare_init(Object *obj)
   1372 {
   1373     CompareState *s = COLO_COMPARE(obj);
   1374 
   1375     object_property_add_str(obj, "primary_in",
   1376                             compare_get_pri_indev, compare_set_pri_indev);
   1377     object_property_add_str(obj, "secondary_in",
   1378                             compare_get_sec_indev, compare_set_sec_indev);
   1379     object_property_add_str(obj, "outdev",
   1380                             compare_get_outdev, compare_set_outdev);
   1381     object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
   1382                             (Object **)&s->iothread,
   1383                             object_property_allow_set_link,
   1384                             OBJ_PROP_LINK_STRONG);
   1385     /* This parameter just for Xen COLO */
   1386     object_property_add_str(obj, "notify_dev",
   1387                             compare_get_notify_dev, compare_set_notify_dev);
   1388 
   1389     object_property_add(obj, "compare_timeout", "uint64",
   1390                         compare_get_timeout,
   1391                         compare_set_timeout, NULL, NULL);
   1392 
   1393     object_property_add(obj, "expired_scan_cycle", "uint32",
   1394                         compare_get_expired_scan_cycle,
   1395                         compare_set_expired_scan_cycle, NULL, NULL);
   1396 
   1397     object_property_add(obj, "max_queue_size", "uint32",
   1398                         get_max_queue_size,
   1399                         set_max_queue_size, NULL, NULL);
   1400 
   1401     s->vnet_hdr = false;
   1402     object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
   1403                              compare_set_vnet_hdr);
   1404 }
   1405 
   1406 void colo_compare_cleanup(void)
   1407 {
   1408     CompareState *tmp = NULL;
   1409     CompareState *n = NULL;
   1410 
   1411     QTAILQ_FOREACH_SAFE(tmp, &net_compares, next, n) {
   1412         object_unparent(OBJECT(tmp));
   1413     }
   1414 }
   1415 
   1416 static void colo_compare_finalize(Object *obj)
   1417 {
   1418     CompareState *s = COLO_COMPARE(obj);
   1419     CompareState *tmp = NULL;
   1420 
   1421     qemu_mutex_lock(&colo_compare_mutex);
   1422     QTAILQ_FOREACH(tmp, &net_compares, next) {
   1423         if (tmp == s) {
   1424             QTAILQ_REMOVE(&net_compares, s, next);
   1425             break;
   1426         }
   1427     }
   1428     if (QTAILQ_EMPTY(&net_compares)) {
   1429         colo_compare_active = false;
   1430         qemu_mutex_destroy(&event_mtx);
   1431         qemu_cond_destroy(&event_complete_cond);
   1432     }
   1433     qemu_mutex_unlock(&colo_compare_mutex);
   1434 
   1435     qemu_chr_fe_deinit(&s->chr_pri_in, false);
   1436     qemu_chr_fe_deinit(&s->chr_sec_in, false);
   1437     qemu_chr_fe_deinit(&s->chr_out, false);
   1438     if (s->notify_dev) {
   1439         qemu_chr_fe_deinit(&s->chr_notify_dev, false);
   1440     }
   1441 
   1442     colo_compare_timer_del(s);
   1443 
   1444     qemu_bh_delete(s->event_bh);
   1445 
   1446     AioContext *ctx = iothread_get_aio_context(s->iothread);
   1447     aio_context_acquire(ctx);
   1448     AIO_WAIT_WHILE(ctx, !s->out_sendco.done);
   1449     if (s->notify_dev) {
   1450         AIO_WAIT_WHILE(ctx, !s->notify_sendco.done);
   1451     }
   1452     aio_context_release(ctx);
   1453 
   1454     /* Release all unhandled packets after compare thead exited */
   1455     g_queue_foreach(&s->conn_list, colo_flush_packets, s);
   1456     AIO_WAIT_WHILE(NULL, !s->out_sendco.done);
   1457 
   1458     g_queue_clear(&s->conn_list);
   1459     g_queue_clear(&s->out_sendco.send_list);
   1460     if (s->notify_dev) {
   1461         g_queue_clear(&s->notify_sendco.send_list);
   1462     }
   1463 
   1464     if (s->connection_track_table) {
   1465         g_hash_table_destroy(s->connection_track_table);
   1466     }
   1467 
   1468     object_unref(OBJECT(s->iothread));
   1469 
   1470     g_free(s->pri_indev);
   1471     g_free(s->sec_indev);
   1472     g_free(s->outdev);
   1473     g_free(s->notify_dev);
   1474 }
   1475 
   1476 static void __attribute__((__constructor__)) colo_compare_init_globals(void)
   1477 {
   1478     colo_compare_active = false;
   1479     qemu_mutex_init(&colo_compare_mutex);
   1480 }
   1481 
   1482 static const TypeInfo colo_compare_info = {
   1483     .name = TYPE_COLO_COMPARE,
   1484     .parent = TYPE_OBJECT,
   1485     .instance_size = sizeof(CompareState),
   1486     .instance_init = colo_compare_init,
   1487     .instance_finalize = colo_compare_finalize,
   1488     .class_size = sizeof(CompareClass),
   1489     .class_init = colo_compare_class_init,
   1490     .interfaces = (InterfaceInfo[]) {
   1491         { TYPE_USER_CREATABLE },
   1492         { }
   1493     }
   1494 };
   1495 
   1496 static void register_types(void)
   1497 {
   1498     type_register_static(&colo_compare_info);
   1499 }
   1500 
   1501 type_init(register_types);