0

I am trying to print all the occurrence of duplicates in the linked-list, but I'm not able to print the last one.

I have also tried to store last duplicate string ptr1->name in a temporary variable and print it after while loop ends but it doesn't work with different pair of values, only the last one, also I cannot find a way to print the location of last duplicate string.

If there's no better way to print the last duplicate strings, how I can ignore only first occurrences ?

Example:

List :

1 7001 Sanjana
2 7014 Aishwarya
3 7025 Ranjit
4 7017 Gurudas
5 7101 Deeksha
6 7023 Indu
7 7017 Gurudas
8 7001 Sanjana
9 7017 Gurudas
10 7016 Kiran
11 7020 Rajni
12 7020 Rajni

Output Desired :

1 7001 Sanjana
4 7017 Gurudas
7 7017 Gurudas -- Dup
8 7001 Sanjana -- Dup
9 7017 Gurudas -- Dup
11 7020 Rajni
12 7020 Rajni -- Dup

Edit:

Found Solution with help of @peal-mazumder

Solution:

void duplicate(void){
    int count = 0;
    int loc1 = 0;  // 1st Pointer location
    int loc2 = 0;  // 2nd Pointer location
    struct node * ptr1, * ptr2;
    bool vis[5000] = {false}; 

    ptr1 = root;

    while (ptr1!=NULL){
    loc1++;

    // if node is already marked for duplicate it will skip compare and move to next node.

        if (vis[loc1]) {
            ptr1 = ptr1->link;
            continue;
        }
        ptr2 = ptr1->link;
        loc2 = loc1;
        while(ptr2!=NULL){
        loc2++;

            if ((ptr1->num == ptr2->num) && !(strcmpi(ptr1->name, ptr2->name))){
                count++;
                if (count == 1) printf("Duplicate Search Results: \n");

                // delete below if statement if original not to be printed.

                if (!vis[loc1]) printf(" %d--> %02d %s\n",loc1, ptr1->num, ptr1->name); // print original

                printf(" %d--> %02d %s --> Dup\n", loc2, ptr2->num, ptr2->name); // print duplicate
                vis[loc2] = true; // marked for duplicate.
                vis[loc1] = true; // not required if original not to be printed.
            }

            ptr2 = ptr2->link;
        }
        ptr1 = ptr1->link;
    }

    if (!count) {
        printf("No duplicates found in the list!\n");
    }else{
        printf("Total (%d) duplicates.\n", count);
    }
}

Output :

 1--> 7001 Sanjana
 8--> 7001 Sanjana --> Dup
 4--> 7017 Gurudas
 7--> 7017 Gurudas --> Dup
 9--> 7017 Gurudas --> Dup
 11--> 7020 Rajni
 12--> 7020 Rajni --> Dup
Total (4) duplicates.
gaurav.cb
  • 3
  • 3
  • Change `while (ptr1->link!=NULL){` into `while (ptr1!=NULL){`. – 500 - Internal Server Error Mar 12 '22 at 22:36
  • I've changed that but it seems to work the same as if nothing changed. My problem is that if there are two similar values or say three, only one and two of them will print respectively. Eg. from "11 7020 Rajni" & "12 7020 Rajni" , only "11 7020 Rajni" will be printed. I want both. – gaurav.cb Mar 13 '22 at 00:26
  • This is probably the worst case run-time for a linked-list. If you could use a hash-map or equivalent, (a histogram,) it would be `O(n)`. – Neil Mar 13 '22 at 02:42
  • Hi @Neil , I'm a beginner with both C and linked-list, I did see a hash-map solution on geekforgeek, but I'm far away from getting the hold of it yet :( also I needed a way to print the list along with the location of each node. – gaurav.cb Mar 13 '22 at 21:17

2 Answers2

0
8 --> 7001 Sanjana
9 --> 7017 Gurudas
12 --> 7020 Rajni
Total (3) duplicates.

If you are trying to print something like that then check below code otherwise add your required output for your given Example Case on your description.

struct node {
    int num;
    char name[20];
    struct node* link;
} * root;


// function

void duplicate(void){
    int count = 0;
    int loc = 0; // Location in the list.
    struct node * ptr1, * ptr2;
    bool vis[200000] = {false};
    ptr1 = root; // root is global pointer variable.
    while (ptr1->link!=NULL) {
        loc++;
        if(vis[ptr1->num]) { // if i already processed for same num value then we don't need to check duplicate for this again.
            ptr1 = ptr1->link;
            continue;
        }
        vis[ptr1->num] = true;
        ptr2 = ptr1->link;
        int Ptr2Location = loc + 1, lastDupLocation = 0;
        struct node *temp; // to store last duplicate node
        while(ptr2 != NULL) {
            if ((ptr1->num == ptr2->num) && !(strcmpi(ptr1->name, ptr2->name))){
                temp = ptr2;
                lastDupLocation = Ptr2Location;
            }
            Ptr2Location++;
            ptr2 = ptr2->link;
        }
        if(lastDupLocation)
            printf(" %d --> %02d %s\n", lastDupLocation, temp->num, temp->name), count++;
        ptr1 = ptr1->link;
    }

    if (!count) {
        printf("No duplicates found in the list!\n");
    }else {
        printf("\nTotal (%d) duplicates.\n", count);
    }
}
void insert(int num, char str[20], int len)
{
    if(root == NULL) { //If the list is empty
        root = new node();
        root->num = num;
        strcpy(root->name, str);
        root->link = NULL;
    }
    else{
        node* current_node = root; //make a copy of root node
        while(current_node->link!=NULL) //Find the last node
        {
            current_node=current_node->link; //go to next address
        }
        node *newnode = new node(); //create a new node
        newnode->num = num;
        strcpy(newnode->name, str);
        newnode->link = NULL;

        current_node->link=newnode; //link the last node with new node
}

}

int main()
{
    int num, n, i;
    cin>>n;
    char name[20];
    for (i = 0; i<n; i++)
    {
        scanf("%d %s", &num, name);
        insert(num, name, strlen(name));
    }
    duplicate();
    return 0;
}

You can sort this according to their location by storing these values in a data structure or a structure.

1 --> 7001 Sanjana
8 --> 7001 Sanjana
4 --> 7017 Gurudas
7 --> 7017 Gurudas
9 --> 7017 Gurudas
11 --> 7020 Rajni
12 --> 7020 Rajni

Code

void duplicate(void){
    int count = 0;
    int loc = 0; // Location in the list.
    struct node * ptr1, * ptr2;
    bool vis[200000] = {false};
    ptr1 = root; // root is global pointer variable.
    while (ptr1->link!=NULL) {
        loc++;
        if(vis[ptr1->num]) { // if i already processed for same num value then we don't need to check duplicate for this again.
            ptr1 = ptr1->link;
            continue;
        }
        ptr2 = ptr1->link;
        int Ptr2Location = loc + 1;
        while(ptr2 != NULL) {
            if ((ptr1->num == ptr2->num) && !(strcmpi(ptr1->name, ptr2->name))){
                if(!vis[ptr1->num])
                    printf(" %d --> %02d %s\n", loc, ptr1->num, ptr1->name), count++;
                printf(" %d --> %02d %s\n", Ptr2Location, ptr2->num, ptr2->name);
                vis[ptr1->num] = true;
            }
            Ptr2Location++;
            ptr2 = ptr2->link;
        }
        ptr1 = ptr1->link;
    }
}
  • Thank you Peal for working out a solution for me. I've been learning C for couple months. I tried to edit the Question after posting it but the site went down for maintenance. Apologies for trouble. For an output, I wish to print all the duplicate values or skip the first appearance only and print the rest. Eg. duplicate A, B, C, E print All of them or B, C, E only. – gaurav.cb Mar 13 '22 at 17:45
  • @gaurav.Cb I edited my answer. Please recheck it. If you think it helped you, then please upvote it. Happy Coding. – Peal Mazumder Mar 13 '22 at 19:41
  • Hi @peal-mazumder, Thank you so much, you made my day... I came here after finding a solution by myself practicing your `bool[]` array. I didn't know about stdbool.h header. I see that you've also solved it, I will take it for an answer :) Only difference is that I'm using `vis[loc2] / vis[loc1]` logic because I'm comparing num && name. I will edit my question with my version of solution if you'd like to see it :) – gaurav.cb Mar 13 '22 at 20:38
0

less practical answer

duplicate is an O(n^2) function; that is, one loops over ptr1, and for each value, nestled loop over ptr2. This is common in a linked-list.

A more efficient method is hashing, that is, converting the key into a number called a hash [code]. A hash table stores the records by their hash. Seeing this data, I would assume that name depends on num, so num can be your key. If there is guarantee that num falls in, say, 7000 -- 7099, one could have a collision-free perfect hash funcion for this specific data, H(x) = x.num - 7000; O(n) speed.

For example, H(1 7001 Sanjana) = 1 -> store the record at table[1]. Get to H(8 7001 Sanjana) = 1; table[1] is occupied, so one knows immediately that it's a duplicate.

But why even bother with the data at all? One can have O(n) run-time and O(1) space requirement if one doesn't mind a (small) chance of false-positives with a probabilistic Bloom filter.

#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>

/* Define a maximum name length and have the C pre-processor encode this in a
 string automatically. */
#define NAME_LENGTH 31
#define QUOTE(n) XQUOTE(n)
#define XQUOTE(n) #n

/** <https://stackoverflow.com/a/12996028/2472827> */
static uint32_t hash(uint32_t x) {
    x = ((x >> 16) ^ x) * 0x45d9f3b;
    x = ((x >> 16) ^ x) * 0x45d9f3b;
    x = (x >> 16) ^ x;
    return x;
}

/** Thomas Wang's 32 bit Mix Function. */
static uint32_t inthash(uint32_t key) {
    key += ~(key << 15);
    key ^=  (key >> 10);
    key +=  (key << 3);
    key ^=  (key >> 6);
    key += ~(key << 11);
    key ^=  (key >> 16);
    return key;
}

/** <https://nullprogram.com/blog/2018/07/31/> */
static uint32_t lowbias32(uint32_t x) {
    x ^= x >> 16;
    x *= 0x7feb352dU;
    x ^= x >> 15;
    x *= 0x846ca68bU;
    x ^= x >> 16;
    return x;
}

int main(void) {
    uint64_t bloom = 0;
    size_t scans = 0, count = 0;
    unsigned no, id;
    char name[NAME_LENGTH + 1], lf;
loop:
    switch(scanf("%u %u %" QUOTE(NAME_LENGTH) "s%c", &no, &id, name, &lf)) {
    case EOF: if(feof(stdin)) break; else goto catch;
    case 0: case 1: case 2: case 3: errno = EILSEQ; goto catch;
    case 4:
        if(lf != '\n') { errno = EILSEQ; goto catch; }
        else {
            const uint32_t x = (uint32_t)id;
            const uint64_t xvect = (1 << (hash(x) & 63))
                | (1 << (inthash(x) & 63)) | (1 << (lowbias32(x)));
            if((bloom & xvect) == xvect) // print (possible) duplicate
                printf(" %u %u %s --> (possible) Dup\n", no, id, name), count++;
            else // marked for duplicate
                bloom |= xvect;
        }
        scans++;
        goto loop;
    }
    if (!count) {
        printf("No duplicates found in the %zu list!\n", scans);
    }else{
        printf("Total %zu/%zu (possible) duplicates.\n", count, scans);
    }
    return EXIT_SUCCESS;
catch:
    fprintf(stderr, "While scanning record %zu: %s.\n", scans, strerror(errno));
    return EXIT_FAILURE;
}

7 7017 Gurudas --> (possible) Dup 8 7001 Sanjana --> (possible) Dup 9 7017 Gurudas --> (possible) Dup 12 7020 Rajni --> (possible) Dup Total 4/12 (possible) duplicates.

Neil
  • 1,767
  • 2
  • 16
  • 22