4

Hi I would like a int and a float example that causes tearing for writing with an non-atomic values. I can't seem to reproduce this. It seems like something that is extremely rare or something I'm doing wrong.

Here is my test code which never prints. Is there anything wrong with it?

#include <windows.h>
#include <tchar.h>
#include <strsafe.h>

#define MAX_THREADS 64
#define BUF_SIZE 255

DWORD WINAPI MyThreadFunction( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);

// Sample custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
typedef struct MyData {
    int val1;
    int val2;
} MYDATA, *PMYDATA;


int _tmain()
{
    DWORD   dwThreadIdArray[MAX_THREADS];
    HANDLE  hThreadArray[MAX_THREADS]; 

    // Create MAX_THREADS worker threads.

    for( int i=0; i<MAX_THREADS; i++ )
    {
        // Allocate memory for thread data.
        // Create the thread to begin execution on its own.

        hThreadArray[i] = CreateThread( 
            NULL,                   // default security attributes
            0,                      // use default stack size  
            MyThreadFunction,       // thread function name
            NULL,                   // argument to thread function 
            0,                      // use default creation flags 
            &dwThreadIdArray[i]);   // returns the thread identifier 


        // Check the return value for success.
        // If CreateThread fails, terminate execution. 
        // This will automatically clean up threads and memory. 

        if (hThreadArray[i] == NULL) 
        {
            ErrorHandler(TEXT("CreateThread"));
            ExitProcess(3);
        }
    } // End of main thread creation loop.

    // Wait until all threads have terminated.

    WaitForMultipleObjects(MAX_THREADS, hThreadArray, TRUE, INFINITE);

    // Close all thread handles and free memory allocations.

    for(int i=0; i<MAX_THREADS; i++)
    {
        CloseHandle(hThreadArray[i]);
    }

    return 0;
}

#pragma pack(push, 1)
struct Test
{
    char x1;
    char x1;
    char x3;
    int test;
    char x4;
    char x5;
};

Test* t = new Test(); //This is test code don't care about allocation or that it is a global.
#pragma pack(pop)

DWORD WINAPI MyThreadFunction( LPVOID lpParam ) 
{ 
    HANDLE hStdout;

    TCHAR msgBuf[BUF_SIZE];
    size_t cchStringSize;
    DWORD dwChars;

    // Make sure there is a console to receive output results. 

    hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
    if( hStdout == INVALID_HANDLE_VALUE )
        return 1;

    static int thread = 0;
    StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Starting thread, %d\n"), ++thread); 
    StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
    WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);

    t->test = 1;

    for (int i=0; i<1000000000;++i)
    {
        t->test = 1;
        t->test = 10000;
        t->test = 10000000;

        int result = t->test;

        if(result != 1 && result != 10000 && result != 10000000)
        {
            StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Tearing occured = %d\n"), result); 
            StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
            WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);
        }
    }


    return 0; 
} 



void ErrorHandler(LPTSTR lpszFunction) 
{ 
    // Retrieve the system error message for the last-error code.

    LPVOID lpMsgBuf;
    LPVOID lpDisplayBuf;
    DWORD dw = GetLastError(); 

    FormatMessage(
        FORMAT_MESSAGE_ALLOCATE_BUFFER | 
        FORMAT_MESSAGE_FROM_SYSTEM |
        FORMAT_MESSAGE_IGNORE_INSERTS,
        NULL,
        dw,
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
        (LPTSTR) &lpMsgBuf,
        0, NULL );

    // Display the error message.

    lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, 
        (lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40) * sizeof(TCHAR)); 
    StringCchPrintf((LPTSTR)lpDisplayBuf, 
        LocalSize(lpDisplayBuf) / sizeof(TCHAR),
        TEXT("%s failed with error %d: %s"), 
        lpszFunction, dw, lpMsgBuf); 
    MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK); 

    // Free error-handling buffer allocations.

    LocalFree(lpMsgBuf);
    LocalFree(lpDisplayBuf);
}
  • You can't reproduce this because for 32 bit access it's not reproducible on Intel platforms. Atomic variables solve a different problem that you don't test for: the visibility of changes preceding and succeeding the access to the atomic value. For example, if you implemented a spinlock using non-atomic 32 bit access, without memory fences, you'd run into trouble. – Kuba hasn't forgotten Monica Sep 11 '14 at 18:43
  • In order to observe torn 32 bit reads or writes on an Intel platform you will at least need an unaligned access that crosses a cache line boundary or possibly even a page boundary. In general torn 32 bit reads or writes will not happen with normal alignment on Intel platforms, I'm not sure what if any circumstances they can be triggered. – mattnewport Sep 11 '14 at 18:52

2 Answers2

4

I can trigger torn reads / writes with this test code which forces the contended uint32_t to cross a cache line boundary when compiled with Visual Studio 2013 (only seems to happen in Release builds):

#include <algorithm>
#include <atomic>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>

using namespace std;

atomic<bool> gDone = false;

vector<uint32_t> vals = {0x11111111, 0x22222222, 0x33333333, 0x44444444, };

mutex ioMutex;

void writeVal(volatile uint32_t* pVal, int tid) {
    while (!gDone) {
        *pVal = vals[tid];
        const auto currentVal = *pVal;
        auto findIt = find(begin(vals), end(vals), currentVal);
        if (findIt == end(vals)) {
            unique_lock<mutex> ul(ioMutex);
            cout << "Detected torn read/write! pVal = 0x" << setbase(16) << setw(8) << setfill('0')
                 << reinterpret_cast<uintptr_t>(pVal) << " currentVal = 0x" << currentVal << endl;
            gDone = true;
        }
    }
}

int main() {
    vector<char> memVec(16 * 1024);
    char* first = &memVec[0];
    const auto cacheLineSize = 64;
    char* crossesCacheLine =
        reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(first + cacheLineSize) & ~(cacheLineSize - 1)) - 2);
    uint32_t* tearableUint32 = reinterpret_cast<uint32_t*>(crossesCacheLine);
    vector<thread> threads(vals.size());
    for (int i = 0; i != threads.size(); ++i) {
        threads[i] = thread([=] { writeVal(tearableUint32, i); });
    }
    for (auto& t : threads) {
        t.join();
    }
}

Output:

Detected torn read/write! pVal = 0x004bc43e currentVal = 0x11112222
mattnewport
  • 13,728
  • 2
  • 35
  • 39
  • This is great. I haven't been able to run it without it crashing. I assume it's something VS2012 is missing. writeVal is getting a huge value for i. I'll look into factoring out the inline code. – user1335325 Sep 11 '14 at 22:50
  • Ok a little more info... I think void writeVal(volatile uint32_t* pVal, int tid) should be void writeVal(volatile uint32_t* pVal, volatile int tid); but this is still crashing for me in VS2012 reporting this is an invalid access: *pVal = vals[tid]; – user1335325 Sep 12 '14 at 04:19
  • Ok that last issue was caused by a change I made for 2012. I also switched in InterlockedExchange and confirmed that it solved the problem. Thanks very much... this will help me out a lot. – user1335325 Sep 12 '14 at 06:41
0

FWIW, this is just additional info for the previous answer, someone with higher stackoverflow privileges than me may just move it to comments for previous question.

I just checked the addresses that actually give tearing, and as expected, the address is 62 bytes into the cache line, so the 32-bit value gets written to the last two bytes of one cache line and to the first two bytes of another. See gdb output below.

alapaa@hilbert:~/src/stackoverflow$ g++ -g -std=c++0x tear.cpp -pthread -o tear  

alapaa@hilbert:~/src/stackoverflow$ ./tear  

Detected torn read/write! pVal = 0x00c0503e currentVal = 0x33331111  
Detected torn read/write! pVal = 0x00c0503e currentVal = 0x44441111  
alapaa@hilbert:~/src/stackoverflow$ gdb  
GNU gdb (Ubuntu 7.7-0ubuntu3.1) 7.7  
Copyright (C) 2014 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word".

(gdb) p 0x00c0503e % 64  
$1 = 62  
Erik Alapää
  • 2,585
  • 1
  • 14
  • 25