2021-03-01 12:19:44 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2021-03-03 04:52:10 +01:00
"[82189:MainThread](2021-03-02 21:02:54,241) INFO - qlib.Initialization - [config.py:276] - default_conf: client.\n",
"[82189:MainThread](2021-03-02 21:02:54,255) WARNING - qlib.Initialization - [config.py:291] - redis connection failed(host=127.0.0.1 port=6379), cache will not be used!\n",
"[82189:MainThread](2021-03-02 21:02:54,828) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.\n",
"[82189:MainThread](2021-03-02 21:02:54,829) INFO - qlib.Initialization - [__init__.py:47] - data_path=/Users/xuanyidong/.qlib/qlib_data/cn_data\n"
2021-03-01 12:19:44 +01:00
]
}
],
"source": [
"import os\n",
"import sys\n",
"import qlib\n",
2021-03-03 04:52:10 +01:00
"import pprint\n",
"import numpy as np\n",
"import pandas as pd\n",
"qlib.init(provider_uri='~/.qlib/qlib_data/cn_data')\n",
"\n",
"from qlib.config import C\n",
"from qlib.data import D\n",
"from qlib.data.data import DatasetD, ExpressionD, Inst, Cal, FeatureD\n",
"from qlib.data.cache import H\n",
"from qlib.data.filter import NameDFilter\n",
"from qlib.utils import code_to_fname, read_bin"
2021-03-01 12:19:44 +01:00
]
},
{
"cell_type": "code",
2021-03-03 04:52:10 +01:00
"execution_count": 3,
2021-03-01 12:19:44 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2021-03-03 04:52:10 +01:00
"<class 'pandas.core.frame.DataFrame'>\n"
2021-03-01 12:19:44 +01:00
]
}
],
"source": [
2021-03-03 04:52:10 +01:00
"nameDFilter = NameDFilter(name_rule_re='SH[0-9]{4}55')\n",
"instruments_config = D.instruments(market='csi300', filter_pipe=[nameDFilter])\n",
"instruments = D.list_instruments(instruments=instruments_config,\n",
" start_time='2015-01-01',\n",
" end_time='2016-02-15',\n",
" as_list=True)\n",
"\n",
"fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']\n",
"features = D.features(instruments_config, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day')\n",
"print(type(features))"
2021-03-01 12:19:44 +01:00
]
},
{
"cell_type": "code",
2021-03-03 04:52:10 +01:00
"execution_count": 9,
2021-03-01 12:19:44 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2021-03-03 04:52:10 +01:00
" $close $volume Ref($close, 1) Mean($close, 3) \\\n",
"instrument datetime \n",
"SH600655 2010-01-04 8.934296 47799352.0 8.667867 8.691138 \n",
" 2010-01-05 8.889880 29791234.0 8.934296 8.830681 \n",
" 2010-01-06 8.845468 29002874.0 8.889880 8.889881 \n",
" 2010-01-07 8.553690 38189440.0 8.845468 8.763013 \n",
" 2010-01-08 8.645658 23417642.0 8.553690 8.681605 \n",
"... ... ... ... ... \n",
"SH601555 2017-12-25 1.393481 80615584.0 1.406559 1.408012 \n",
" 2017-12-26 1.406559 64259856.0 1.393481 1.402200 \n",
" 2017-12-27 1.400747 58551256.0 1.406559 1.400262 \n",
" 2017-12-28 1.412371 96204872.0 1.400747 1.406559 \n",
" 2017-12-29 1.412371 52801024.0 1.412371 1.408496 \n",
"\n",
" $high-$low \n",
"instrument datetime \n",
"SH600655 2010-01-04 0.412291 \n",
" 2010-01-05 0.203006 \n",
" 2010-01-06 0.250560 \n",
" 2010-01-07 0.412291 \n",
" 2010-01-08 0.275964 \n",
"... ... \n",
"SH601555 2017-12-25 0.020343 \n",
" 2017-12-26 0.018890 \n",
" 2017-12-27 0.017437 \n",
" 2017-12-28 0.045045 \n",
" 2017-12-29 0.013078 \n",
"\n",
"[2867 rows x 5 columns]\n"
2021-03-01 12:19:44 +01:00
]
}
],
"source": [
2021-03-03 04:52:10 +01:00
"print(features)"
2021-03-01 12:19:44 +01:00
]
},
{
"cell_type": "code",
2021-03-03 04:52:10 +01:00
"execution_count": 2,
"metadata": {
"scrolled": true
},
2021-03-01 12:19:44 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2021-03-03 04:52:10 +01:00
"<class 'qlib.data.data.LocalProvider'>\n",
2021-03-01 12:19:44 +01:00
"<class 'qlib.config.QlibConfig'>\n",
"LocalProvider\n",
2021-03-03 04:52:10 +01:00
"Wrapper(provider=<qlib.data.data.LocalProvider object at 0x7ff5601cb370>)\n",
"Wrapper(provider=<qlib.data.data.LocalDatasetProvider object at 0x7ff5601c3b80>)\n",
"<qlib.data.data.LocalDatasetProvider object at 0x7ff5601c3b80>\n",
"LocalDatasetProvider\n",
"--\n",
"Wrapper(provider=<qlib.data.data.LocalInstrumentProvider object at 0x7ff55fb73340>)\n",
"<qlib.data.data.LocalInstrumentProvider object at 0x7ff55fb73340>\n",
"default_disk_cache: 1\n",
"ExpressionD: Wrapper(provider=<qlib.data.data.LocalExpressionProvider object at 0x7ff5601c3bb0>)\n",
"FeatureD : <qlib.data.data.LocalFeatureProvider object at 0x7ff55fb84430>\n"
2021-03-01 12:19:44 +01:00
]
}
],
"source": [
2021-03-03 04:52:10 +01:00
"# Provider:\n",
"print(type(D._provider))\n",
2021-03-01 12:19:44 +01:00
"print(type(C))\n",
"print(C.provider)\n",
2021-03-03 04:52:10 +01:00
"print(D)\n",
"\n",
"# DatasetD Provider\n",
"print(DatasetD)\n",
"print(DatasetD._provider)\n",
"print(C.dataset_provider)\n",
"\n",
"print('--')\n",
"print(Inst)\n",
"print(Inst._provider)\n",
"\n",
"# Default Disk Cache\n",
"print('default_disk_cache: {:}'.format(C.default_disk_cache))\n",
"print('ExpressionD: {:}'.format(ExpressionD))\n",
"print('FeatureD : {:}'.format(FeatureD._provider))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'pprint' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-76544a8bb578>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstruments_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0minstruments_d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDatasetD\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_provider\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_instruments_d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstruments_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfreq\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'day'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstruments_d\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'pprint' is not defined"
]
}
],
"source": [
"pprint.pprint(instruments_config)\n",
"instruments_d = DatasetD._provider.get_instruments_d(instruments_config, freq='day')\n",
"pprint.pprint(instruments_d)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2012-12-31 00:00:00 -> 2019-01-18 00:00:00\n",
"<PandasArray>\n",
"[1.1059314, 1.0935822, 1.1059314, 1.0922102, 1.0839773, 1.0839773, 1.0181155,\n",
" 1.0730004, 1.0867218, 1.068884,\n",
" ...\n",
" 1.1163876, 1.1208236, 1.1119517, 1.0986437, 1.1075157, 1.0971651, 1.1149089,\n",
" 1.083857, 1.083857, 1.0956864]\n",
"Length: 1439, dtype: float32\n"
]
}
],
"source": [
"instrument, field, freq = 'SH601555', '$close', 'day'\n",
"all_dates = D.calendar(start_time='2011-12-31', end_time='2019-02-10', freq=freq)\n",
"start_time, end_time = all_dates[0], all_dates[-11]\n",
"print(str(start_time) + ' -> ' + str(end_time))\n",
"obj = ExpressionD.expression(instrument, field, start_time, end_time, freq)\n",
"print(obj.array)\n",
"\n",
"# expression = ExpressionD.get_expression_instance(field)\n",
"# start_time = pd.Timestamp(start_time)\n",
"# end_time = pd.Timestamp(end_time)\n",
"# _, _, start_index, end_index = Cal.locate_index(start_time, end_time, freq='day', future=False)\n",
"# print(start_index)\n",
"# print(end_index)\n",
"\n",
"# fname = code_to_fname(instrument)\n",
"# uri_data = FeatureD._uri_data.format(instrument.lower(), field[1:], freq)\n",
"# print(uri_data)\n",
"# # series = read_bin(uri_data, start_index, end_index)\n",
"# series = read_bin(uri_data, 2850, 2870)\n",
"# print(series)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wrapper(provider=<qlib.data.data.LocalProvider object at 0x7ff5601cb370>)\n",
"Wrapper(provider=<qlib.data.data.LocalInstrumentProvider object at 0x7ff55fb73340>)\n",
"Wrapper(provider=<qlib.data.data.LocalExpressionProvider object at 0x7ff5601c3bb0>)\n"
]
}
],
"source": [
"from qlib.data import D\n",
"from qlib.data.data import ExpressionD, Inst\n",
"print(D)\n",
"print(Inst)\n",
"print(ExpressionD)"
2021-03-01 12:19:44 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}