I would like to keep matching ticker symbols in both. I am trying to scrape a few scanners and get a list of symbols that are in both. I originally tried Beautiful soup, but found it a little bit easier to use pandas. I am still learning to use pandas and python and thought this would be a good project.
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import numpy as np
from google.colab import drive
#def New_highs():
Default_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36'} # This is chrome, you can set whatever browser you like
New_highurl="https://stockcharts.com/def/servlet/SC.scan?s=TSAL[t.t_eq_s]![as0,20,tv_gt_40000]![th0_gt_am1,253,th]&report=predefall"
response= requests.get(New_highurl,headers=Default_headers)
data = response.text
soup= bs(data,'lxml') # parses the html tags using BeautifulSoup
#read panda url
dfs=pd.read_html(data,header=0,index_col=0)
NH_df=dfs[0]
##lists of arrays for header data
#convert to csv
df.to_csv('charts_ticker_new_high.csv')
#return NH_df
#def New_CCI():
Default_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36'} # This is chrome, you can set whatever browser you like
CCI_url="https://stockcharts.com/def/servlet/SC.scan?s=TSAL[t.t_eq_s]![as0,20,tv_gt_40000]![bu0,20_gt_100]![bu1,20_le_100]![bu2,20_lt_100]&report=predefall"
response= requests.get(CCI_url,headers=Default_headers)
data = response.text
soup= bs(data,'lxml') # parses the html tags using BeautifulSoup
#read panda url
dfs=pd.read_html(data,header=0,index_col=0)
CCI_df=dfs[0]
##lists of arrays for header data
#convert to csv
CCI_df.to_csv('charts_ticker_new_CCI.csv')
# return CCI_df
#New_highs()
#New_CCI()
matching= NH_df.Symbol == CCI_df.Symbol
print(matching)
an example would be from dataframe1 and from df 2 it should output only GROW Symbol Symbol AAPL GROW GROW CAMP SPCB BAND
I also get the following errors /usr/local/lib/python3.6/dist-packages/pandas/core/ops/init.py in wrapper(self, other) 363 364 if isinstance(other, ABCSeries) and not self._indexed_same(other): --> 365 raise ValueError("Can only compare identically-labeled Series objects") 366 367 lvalues = extract_array(self, extract_numpy=True)
ValueError: Can only compare identically-labeled Series objects