Let's say that we've read a python file with multiple lines of comments and then some code. This is stored in data
as a list
or np.ndarray
data = ["# this", "# is" "# the first comment", "print('hello world')", "# second comment"]
expected_output = ["```this is the first comment```", "print('hello world')", "``` second comment```"]
expected_output
The desired output will replace the multiple elements starting with a #
character with the single parsed comment wrapped in the backtick
characters
['```this is the first comment```',
"print('hello world')",
'``` second comment```']
I can do the parsing but I don't know how to replace the individual lines with the newly formatted single lines (e.g. index [0, 1, 2]
in the example above).
The script so far:
from pathlib import Path
import numpy as np
from itertools import groupby
from operator import itemgetter
def get_consecutive_group_edges(data: np.ndarray):
# https://stackoverflow.com/a/2154437/9940782
edges = []
for k, g in groupby(enumerate(data),lambda x:x[0]-x[1]):
group = (map(itemgetter(1),g))
group = list(map(int, group))
edges.append((group[0],group[-1]))
# convert ranges into group index
# https://stackoverflow.com/a/952952/9940782
group_lookup = dict(enumerate(edges))
return group_lookup
if __name__ == "__main__":
# https://stackoverflow.com/a/17141572/9940782
filedata = ["# this", "# is" "# the first comment", "print('hello world')", "# second comment"]
# find all consecutive lines starting as comments
comment_lines = np.argwhere([l[0] == "#" for l in filedata])
group_lookup = get_consecutive_group_edges(comment_lines)
output_lines = []
for comment_idx in group_lookup.keys():
# extract the comment groups
min_comment_line = group_lookup[comment_idx][0]
max_comment_line = group_lookup[comment_idx][1] + 1
data = filedata[min_comment_line: max_comment_line]
# remove the comment characters
output = "".join(data).replace("\n", " ").replace("#", "")
# wrap in ```
output = "```" + output + "```" + "\n"
I am failing at the final step: How do I replace all of the values between min_comment_line
and max_comment_line
for each group
with the single, newly parsed output
?
Can I do something with the non-commented lines?
non_comment_lines = np.argwhere([l[0] != "#" for l in filedata])