데이터 공부를 기록하는 공간

[crawling] requests - 배출권시세 본문

STUDY/CRAWLING

[crawling] requests - 배출권시세

BOTTLE6 2022. 6. 6. 00:02

참고 블로그 : https://wg-cy.tistory.com/54

### https://blog.naver.com/ellijahbyeon/222213048898
import requests
import pandas as pd
import numpy as np
from io import BytesIO
import time
import datetime
from tqdm import tqdm
import os
### generate.cmd 
### payload - Form Data
'''
locale: ko_KR
trdDd: 20220603
share: 1
money: 1
csvxls_isNo: false
name: fileDown
url: dbms/MDC/STAT/standard/MDCSTAT15601
'''
def kau_basic(tdate):
    #### generate
    gen_req_url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
    query_str_parms = {
        'locale': 'ko_KR',
        'trdDd': str(tdate),
        'share': '1',
        'money': '1',
        'csvxls_isNo': 'false',
        'name': 'fileDown',
        'url': 'dbms/MDC/STAT/standard/MDCSTAT15601'
    }
    headers = {
        'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader', #하이퍼링크 흔적
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': '#############' #generate.cmd에서 찾아서 입력하세요
    }
    r = requests.get(gen_req_url, query_str_parms, headers=headers)
    #### download
    gen_req_url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
    form_data = {
        'code': r.content
    }
    r = requests.post(gen_req_url, form_data, headers=headers)
    df = pd.read_csv(BytesIO(r.content), encoding='cp949')
    df['일자'] = tdate
    file_name = 'kau_'+ str(tdate) + '.csv'
    df.to_csv(path+file_name, index=False, index_label=None, encoding='cp949')
    print('KAU crawling completed :', tdate)
    return
# 일자별 저장하기 
path = 'C:/python/*****/kau/' #### 저장주소, 주식데이터
for year in tqdm(range(2021, 2023)):
    for month in range(1, 13):
        for day in range(1, 32):
            tdate = year * 10000 + month * 100 + day * 1
            if tdate <= int(datetime.datetime.now().strftime("%Y%m%d")):
                kau_basic(tdate)
                time.sleep(0.05)
# 합치기
kau_list = os.listdir(path)
WIP = pd.read_csv(path + kau_list[0], encoding='cp949') # 시작일자에 해당하는 파일명 적어줘야함
for i in tqdm(kau_list[1:]):
    tomorrow = pd.read_csv(path + i, encoding='cp949')
    WIP = pd.concat([WIP, tomorrow], sort=False, ignore_index=True)
WIP = WIP.drop_duplicates()
WIP

cond = WIP['종목명']=='KAU21'
WIP.loc[cond,'종가'].plot(title='kau21', figsize=(12,6))

cond = WIP['종목명']=='KAU21'
data = WIP[cond]
fig, ax = plt.subplots(figsize=(20,6))
x = data['일자']
y = data['종가']
z = data['거래량']
z2= data['거래량'].cumsum()
ax.plot(x, y, label='price', lw=6)
ax2 = ax.twinx()
ax2.bar(x, z2, label='volume', color='gray', alpha=0.5)
ax.legend()
ax2.legend()
ax.set_ylabel('price')
ax2.set_ylabel('volume')
ax.set_xlabel('kau21')
ax.set_title("kau21 price and volume trend")
ax.grid()

'STUDY > CRAWLING' 카테고리의 다른 글

[crawling] requests - krx 시세 가져오기  (0) 2022.06.06
[CRAWLING] beautifulsoup-kema  (0) 2022.05.28
[CRAWLING] OPENAPI - smp  (0) 2022.05.27
Comments