0

I have several soil moisture sensors logging moisture and temprature data at different depths in field. I am trying to reformat data from a CSV file into a new CSV file.

I have been able to get the code to seperate data into different columns without using the pd.read json function as i was having issues with this.

My current code is shown:

import csv

source_filename = source file here
target_filename = target file here

temp1 = []
def removeSegment(temp8,temp7):
    temp11 = ""
    temp9 = temp8.split(",")
    for temp10 in range(len(temp9)):
        if (temp7 not in temp9[temp10]):
            if (len(temp11)==0):
                temp11 = temp9[temp10]
            else:
                temp11 = temp11 + "," + temp9[temp10]
    return temp11
with open(source_filename) as myfile:
    myreader = csv.reader(myfile)
    for myrow in myreader:
        if ("sensor" in myrow[1]) and ("val" not in myrow[2]):
            temp1.append("sensor_id:"+myrow[1].split("/")[1]+","+myrow[2].replace('"','')[1:-1])
temp3 = dict()
for temp2 in range(len(temp1)):
    temp4 = temp1[temp2].split(",")
    for temp5 in range(len(temp4)):
        temp6 = temp4[temp5]
        if (temp6.split(":")[0] == "timestamp"):
            temp3[temp6.split(":")[1]] = removeSegment(temp1[temp2],"timestamp")
mywriter = open(target_filename, 'w')
mywriter.write('timestamp,sensor_id,m1,m2,t1,t2,t3,t4\n')
for temp12, temp13 in temp3.items():
    mywriter.write(temp12+",")
    temp15 = temp13.split(",")
    for temp14 in range(len(temp15)):
        mywriter.write(temp15[temp14].split(":")[1])
        if temp14 == len(temp15)-1:
            mywriter.write("\n")
        else:
            mywriter.write(",")
mywriter.close()

Here is a sample of the dataset in text format:

   receivedTS,topic,messageData
1629668287051,sensor/200301000000000000000000/501,"{""m1"":210,""m2"":359,""t1"":83,""t2"":87,""t3"":91,""t4"":96,""timestamp"":1629668261}"
1629667752828,sensor/200300000000000000000000/501,"{""m1"":285,""m2"":384,""t1"":82,""t2"":85,""t3"":86,""t4"":88,""timestamp"":1629667612}"
1629667748506,sensor/200294000000000000000000/501,"{""m1"":310,""m2"":406,""t1"":67,""t2"":73,""t3"":83,""t4"":83,""timestamp"":1629667609}"
1629667745098,sensor/200297000000000000000000/501,"{""m1"":198,""m2"":371,""t1"":80,""t2"":85,""t3"":86,""t4"":92,""timestamp"":1629667604}"
1629667741588,sensor/200303000000000000000000/501,"{""m1"":495,""m2"":483,""t1"":85,""t2"":90,""t3"":95,""t4"":97,""timestamp"":1629667600}"
1629667738060,sensor/200302000000000000000000/501,"{""m1"":499,""m2"":463,""t1"":102,""t2"":107,""t3"":106,""t4"":108,""timestamp"":1629667596}"
1629667734545,sensor/200296000000000000000000/501,"{""m1"":258,""m2"":358,""t1"":88,""t2"":93,""t3"":96,""t4"":95,""timestamp"":1629667592}"
1629667622587,sensor/200298000000000000000000/501,"{""m1"":381,""m2"":358,""t1"":77,""t2"":83,""t3"":86,""t4"":86,""timestamp"":1629667584}"
1629667614063,sensor/200187000000000000000000/501,"{""m1"":248,""m2"":386,""t1"":78,""t2"":85,""t3"":91,""t4"":87,""timestamp"":1629667580}"
1629667605605,sensor/200295000000000000000000/501,"{""m1"":219,""m2"":416,""t1"":85,""t2"":90,""t3"":88,""t4"":92,""timestamp"":1629667576}"
1629667598017,sensor/200301000000000000000000/501,"{""m1"":210,""m2"":359,""t1"":83,""t2"":87,""t3"":91,""t4"":96,""timestamp"":1629667573}"
1629666973230,sensor/200300000000000000000000/501,"{""m1"":285,""m2"":384,""t1"":82,""t2"":86,""t3"":87,""t4"":88,""timestamp"":1629666929}"
1629666969623,sensor/200294000000000000000000/501,"{""m1"":311,""m2"":407,""t1"":67,""t2"":73,""t3"":83,""t4"":83,""timestamp"":1629666925}"
1629666965979,sensor/200297000000000000000000/501,"{""m1"":198,""m2"":371,""t1"":80,""t2"":85,""t3"":86,""t4"":92,""timestamp"":1629666920}"
1629666961558,sensor/200303000000000000000000/501,"{""m1"":495,""m2"":483,""t1"":83,""t2"":90,""t3"":96,""t4"":97,""timestamp"":1629666916}"
1629666956216,sensor/200302000000000000000000/501,"{""m1"":499,""m2"":463,""t1"":102,""t2"":107,""t3"":107,""t4"":108,""timestamp"":1629666912}"
1629666952539,sensor/200296000000000000000000/501,"{""m1"":258,""m2"":358,""t1"":90,""t2"":93,""t3"":96,""t4"":96,""timestamp"":1629666908}"
1629666948098,sensor/200299000000000000000000/501,"{""m1"":248,""m2"":354,""t1"":77,""t2"":85,""t3"":88,""t4"":92,""timestamp"":1629666904}"
1629666936909,sensor/200298000000000000000000/501,"{""m1"":381,""m2"":359,""t1"":77,""t2"":83,""t3"":85,""t4"":86,""timestamp"":1629666900}"
1629666928507,sensor/200187000000000000000000/501,"{""m1"":248,""m2"":386,""t1"":78,""t2"":85,""t3"":91,""t4"":87,""timestamp"":1629666895}"
1629666920883,sensor/200295000000000000000000/501,"{""m1"":219,""m2"":416,""t1"":85,""t2"":90,""t3"":88,""t4"":92,""timestamp"":1629666891}"
1629666912323,sensor/200301000000000000000000/501,"{""m1"":210,""m2"":359,""t1"":83,""t2"":87,""t3"":91,""t4"":96,""timestamp"":1629666889}"
1629666383459,sensor/200300000000000000000000/501,"{""m1"":285,""m2"":384,""t1"":82,""t2"":85,""t3"":87,""t4"":90,""timestamp"":1629666244}"
1629666379819,sensor/200294000000000000000000/501,"{""m1"":310,""m2"":406,""t1"":67,""t2"":73,""t3"":83,""t4"":83,""timestamp"":1629666240}"
1629666375988,sensor/200297000000000000000000/501,"{""m1"":198,""m2"":371,""t1"":80,""t2"":85,""t3"":86,""t4"":91,""timestamp"":1629666236}"
1629666372369,sensor/200303000000000000000000/501,"{""m1"":495,""m2"":483,""t1"":83,""t2"":90,""t3"":96,""t4"":97,""timestamp"":1629666233}"
1629666368510,sensor/200302000000000000000000/501,"{""m1"":499,""m2"":463,""t1"":102,""t2"":107,""t3"":107,""t4"":108,""timestamp"":1629666228}"
1629666364688,sensor/200296000000000000000000/501,"{""m1"":259,""m2"":358,""t1"":90,""t2"":93,""t3"":96,""t4"":96,""timestamp"":1629666224}"
1629666360531,sensor/200299000000000000000000/501,"{""m1"":248,""m2"":354,""t1"":77,""t2"":85,""t3"":87,""t4"":92,""timestamp"":1629666219}"
1629666251578,sensor/200298000000000000000000/501,"{""m1"":381,""m2"":358,""t1"":77,""t2"":83,""t3"":85,""t4"":86,""timestamp"":1629666215}"
1629666243135,sensor/200187000000000000000000/501,"{""m1"":249,""m2"":386,""t1"":78,""t2"":85,""t3"":91,""t4"":87,""timestamp"":1629666210}"
1629666235557,sensor/200295000000000000000000/501,"{""m1"":219,""m2"":416,""t1"":85,""t2"":90,""t3"":88,""t4"":91,""timestamp"":1629666207}"
1629666226302,sensor/200301000000000000000000/501,"{""m1"":208,""m2"":359,""t1"":83,""t2"":87,""t3"":91,""t4"":95,""timestamp"":1629666204}"
1629665596217,sensor/200300000000000000000000/501,"{""m1"":285,""m2"":384,""t1"":82,""t2"":86,""t3"":86,""t4"":88,""timestamp"":1629665560}"
1629665592579,sensor/200294000000000000000000/501,"{""m1"":310,""m2"":406,""t1"":67,""t2"":75,""t3"":83,""t4"":83,""timestamp"":1629665553}"
1629665588137,sensor/200297000000000000000000/501,"{""m1"":198,""m2"":371,""t1"":80,""t2"":85,""t3"":86,""t4"":92,""timestamp"":1629665549}"
1629665582830,sensor/200303000000000000000000/501,"{""m1"":495,""m2"":483,""t1"":85,""t2"":90,""t3"":96,""t4"":97,""timestamp"":1629665546}"
1629665578596,sensor/200302000000000000000000/501,"{""m1"":499,""m2"":463,""t1"":103,""t2"":107,""t3"":107,""t4"":108,""timestamp"":1629665542}"
1629665575727,sensor/200296000000000000000000/501,"{""m1"":220,""m2"":416,""t1"":85,""t2"":90,""t3"":88,""t4"":92,""timestamp"":1629665519}"
1623887062762,transceiver/004900314E46500D2033334D/event/ex/scan,"{""sensors"":[{""id"":""200187000000000000000000"",""type"":""501"",""modAdr"":3,""fw"":""48.50"",""hw"":""15.15""}],""timestamp"":1623887071}"
1623887056853,transceiver/004900314E46500D2033334D/data/network,"{""c"":1,""d"":0,""n"":[""53001""]}"
1623887051515,transceiver/004900314E46500D2033334D/data/cell-strength,"{""rssi"":-61,""ber"":3,""timestamp"":1623887028}"
1623887045991,transceiver/004900314E46500D2033334D/data/mains-voltage,"{""mv"":24288,""timestamp"":1623886996}"
1623887040884,transceiver/004900314E46500D2033334D/data/battery-voltage,"{""mv"":13648,""timestamp"":1623886996}"
1623887030393,transceiver/004900314E46500D2033334D/event/boot,"{""rebootReason"":""0"",""version"":""17.501"",""mBoard"":""0.0.4"",""expBoard"":""0.0.7"",""sBoard"":""0.0.6"",""timestamp"":1623886993,""sensors"":[]}"
1623887017480,transceiver/004900314E46500D2033334D/event/netid,"{""imsi"":""204047116508564"",""imei"":""352909081612471""}"
1623886957737,transceiver/004900314E46500D2033334D/data/mains-voltage,"{""mv"":24280,""timestamp"":1623886908}"

All i need is to convert the timestamp into date/time format and create a new sheet for each different sensor ID but am unsure how to do this

Any help will be appreciated

OneCricketeer
  • 179,855
  • 19
  • 132
  • 245
  • 1
    The first argument to `pd.read_json` should be the filename. `data` is a tuple containing the filename and `r`. – Barmar Aug 23 '21 at 21:43
  • You also have transceiver data. Should that be discarded? – tdelaney Aug 23 '21 at 21:49
  • @OneCricketeer “Third column needs to be fixed” - It’s in CSV format; once read using e.g. the `csv` module the third column won’t be surrounded by “ will have single “. – DisappointedByUnaccountableMod Aug 23 '21 at 21:56
  • 1
    @OneCricketeer I’m referring to both. Reading as a CSV formatted file the third column value will become a string (the outer quotes) with single “ (the inner quotes). Try it yourself. – DisappointedByUnaccountableMod Aug 23 '21 at 22:01
  • I have edited the question with code to discard the transceiver data and separate the columns as required. I am unsure how to separate sensor IDs into new sheets and converting the timestamp to date/time format. Please see edited question. Any help would be appreciated :) – Shivash Mishra Aug 23 '21 at 22:17

2 Answers2

0

If you meant Excel "sheet", then you should use an Excel library, but here is a start for grouping the sensor data into individual files

import csv

sensor_data = dict()

with open('data.csv') as f:
  reader = csv.DictReader(f)
  for line in reader:
    sensor = line['topic']
    data = line['messageData']
    if sensor not in sensor_data:
      sensor_data[sensor] = list()
    sensor_data[sensor].append(line['receivedTS'])
    sensor_data[sensor].append(line['messageData'])

for sensor, data in sensor_data.items():
  # TODO: Replace with excel logic
  with open(sensor.replace('/', '_') + '.csv', 'a+') as f:
    writer = csv.DictWriter(f, fieldnames=['receivedTS', 'messageData'])
    writer.writeheader()
    writer.writerow({'receivedTS': data[0], 'messageData': data[1]})

convert the timestamp into date/time format

You are looking for datetime.strftime. Converting unix timestamp string to readable date


$ ls *.csv | xargs wc -l

  48 data.csv
   2 sensor_200187000000000000000000_501.csv
   2 sensor_200294000000000000000000_501.csv
   2 sensor_200295000000000000000000_501.csv
   2 sensor_200296000000000000000000_501.csv
   2 sensor_200297000000000000000000_501.csv
   2 sensor_200298000000000000000000_501.csv
   2 sensor_200299000000000000000000_501.csv
   2 sensor_200300000000000000000000_501.csv
   2 sensor_200301000000000000000000_501.csv
   2 sensor_200302000000000000000000_501.csv
   2 sensor_200303000000000000000000_501.csv
   2 transceiver_004900314E46500D2033334D_data_battery-voltage.csv
   2 transceiver_004900314E46500D2033334D_data_cell-strength.csv
   2 transceiver_004900314E46500D2033334D_data_mains-voltage.csv
   2 transceiver_004900314E46500D2033334D_data_network.csv
   2 transceiver_004900314E46500D2033334D_event_boot.csv
   2 transceiver_004900314E46500D2033334D_event_ex_scan.csv
   2 transceiver_004900314E46500D2033334D_event_netid.csv
OneCricketeer
  • 179,855
  • 19
  • 132
  • 245
0

pandas.DataFrame can be constructed from a list of dictionaries as long as those dicts all have the same keys. That's the case for sensors in your CSV. You can create a 3 column dataframe from the file, then do json.loads on the "messageData" to create a list of dictionaries. That becomes a new dataframe that you can join to your original dataframe to get the data you want. Finally, you can use the unque sensor names to create a collection of dataframes per sensor.

import pandas as pd
import json

df = pd.read_csv("test.csv")
# remove non-sensor data
df = df[df["topic"].str.startswith("sensor")]
# convert json message data to dataframe
data_df = pd.DataFrame(json.loads(data) for data in df["messageData"])
del df["messageData"]
# join for final df
df = df.join(data_df)
print(df.head(2))
# break into dataframes by unique sensor name
by_sensor = {sensor: df[df['topic'] == sensor] for sensor in df['topic'].unique()}

for k,v in by_sensor.items():
    print(f"--- {k} ---")
    print(v)
tdelaney
  • 73,364
  • 6
  • 83
  • 116