Assuming the format is the same all along (i.e all splits starts with the string "Datetime
"), you can get the index of where the string starts with "Datetime"
, and chose all data between each splits:
import pandas as pd
data = pd.Series(["Datetime stamp 1",
"Obs1",
"Obs2",
"Obs3",
"Datetime stamp 2",
"Obs1",
"Obs2",
"Obs3"])
#Get splits
idx_split =data.str.startswith("Datetime ")
idx_split = idx_split.index[idx_split] # [0,4]
N_COLS = len(idx_split) #number of columns
vals = [0]*N_COLS #Initialize values
#Loop over each split-index and slize data
for i in range(N_COLS-1):
vals[i] = list(data[idx_split[i]:idx_split[i+1]])
vals[i+1] = list(data[idx_split[-1]:]) #Get the last one
print(vals)
#[['Datetime stamp 1', 'Obs1', 'Obs2', 'Obs3'],
#['Datetime stamp 2', 'Obs1', 'Obs2', 'Obs3']]
#Get the first element from each list and use that as column name
# + remove it
cols = [p.pop(0) for p in vals]
#The data list is in wrong shape for pandas, use https://stackoverflow.com/questions/6473679/transpose-list-of-lists to transpose the list to right shape
df = pd.DataFrame(list(map(list, zip(*vals))),columns = cols)
print(df)
#Datetime stamp 1 Datetime stamp 2
#0 Obs1 Obs1
#1 Obs2 Obs2
#2 Obs3 Obs3