{"id":12103,"date":"2024-12-17T20:11:28","date_gmt":"2024-12-17T11:11:28","guid":{"rendered":"https:\/\/highreso.jp\/edgehub\/?p=12103"},"modified":"2024-12-17T20:11:29","modified_gmt":"2024-12-17T11:11:29","slug":"llamaindexreader","status":"publish","type":"post","link":"https:\/\/highreso.jp\/edgehub\/machinelearning\/llamaindexreader.html","title":{"rendered":"LlamaIndex\u3067\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3001Web\u30da\u30fc\u30b8\u3092\u8aad\u307f\u8fbc\u3080\uff01"},"content":{"rendered":"\n<p>LlamaIndex\u3067\u306f\u3001\u591a\u69d8\u306a\u5f62\u5f0f\u306e\u30c7\u30fc\u30bf\u3082\u3068\u306bRAG\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p><span class=\"swl-format-2\">\u3053\u306e\u8a18\u4e8b\u3067\u306f\u3001LlamaIndex\u3067\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3001Web\u30da\u30fc\u30b8\u306a\u3069\u306e\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<h2 class=\"wp-block-heading\">LlamaIndex\u3067\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3001Web\u30da\u30fc\u30b8\u3092\u8aad\u307f\u8fbc\u3080<\/h2>\n\n\n\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"600\" height=\"300\" src=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/09\/9_midashi_sf0923.jpg\" alt=\"\u898b\u51fa\u3057\u753b\u50cf\" class=\"wp-image-11920\" srcset=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/09\/9_midashi_sf0923.jpg 600w, https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/09\/9_midashi_sf0923-300x150.jpg 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/figure>\n\n\n\n<p>LlamaIndex\u3067\u306f\u3001\u591a\u69d8\u306a\u30c7\u30fc\u30bf\u5f62\u5f0f\u3092\u3082\u3068\u306bRAG\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p><span class=\"swl-format-2\">\u3053\u306e\u8a18\u4e8b\u3067\u306f\u6b21\u306e\u30e2\u30b8\u30e5\u30fc\u30eb\u3092\u4f7f\u3063\u3066\u3001\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3001Web\u30da\u30fc\u30b8\u306e\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<div class=\"wp-block-group is-style-big_icon_good\"><div class=\"wp-block-group__inner-container\">\n<ul class=\"wp-block-list\">\n<li>SimpleDirectoryReader\u3067\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3092\u8aad\u307f\u8fbc\u3080<\/li>\n\n\n\n<li>SimpleWebPageReader\u3067Web\u30da\u30fc\u30b8\u3092\u8aad\u307f\u8fbc\u3080<\/li>\n<\/ul>\n<\/div><\/div>\n\n\n\n<div class=\"p-blogParts post_content\" data-partsID=\"4485\"><p><div class=\"p-adBox -ranking -border-on\" data-id=\"4540\" data-ad=\"ranking\"><div class=\"p-adBox__title -rank1\">50\uff05\u5b89\u3044\u696d\u754c\u6700\u5b89\u7d1aGPU\u30af\u30e9\u30a6\u30c9\u300cGPUSOROBAN\u300d<\/div><div class=\"p-adBox__body\"><div class=\"p-adBox__img\"><div class=\"centerw250\"><a href=\"https:\/\/soroban.highreso.jp\/?utm_medium=edgehub&utm_source=edgehub&utm_campaign=highend-boxtop&adid=edgehub\" rel=\"nofollow\" referrerpolicy=\"no-referrer-when-downgrade\"><img decoding=\"async\" src=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/06\/gpusorobanlllm_banner.jpg\" alt=\"GPUSOROBAN\"><\/a><\/div><\/div><div class=\"p-adBox__details\"><div class=\"p-adBox__star c-reviewStars\"><i class=\"icon-star-full\"><\/i><i class=\"icon-star-full\"><\/i><i class=\"icon-star-full\"><\/i><i class=\"icon-star-full\"><\/i><i class=\"icon-star-full\"><\/i><\/div><div class=\"p-adBox__price u-thin u-fz-s\">GPU\u306e\u30b3\u30b9\u30c8\u3067\u304a\u60a9\u307f\u306e\u65b9\u306b\uff01<\/div><div class=\"p-adBox__desc\">AWS\u3068\u6bd4\u3079\u306650%\u4ee5\u4e0a\u5b89\u3044\u6599\u91d1\u3067\u3001\u9ad8\u901fGPU\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u3092\u3054\u5229\u7528\u3044\u305f\u3060\u3051\u307e\u3059\u3002\r\n\u9762\u5012\u306a\u8a2d\u5b9a\u306f\u4e0d\u8981\u3001\u308f\u305a\u304b3\u5206\u3067\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u306e\u7acb\u3061\u4e0a\u3052\u304c\u53ef\u80fd\u3067\u3059\u3002\r\n\u30bf\u30b9\u30af\u306b\u5fdc\u3058\u3066\u6700\u9069\u306aGPU\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u3092\u9078\u629e\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/div><\/div><\/div><div class=\"p-adBox__btns\"><a href=\"https:\/\/soroban.highreso.jp\/compute\/?utm_medium=edgehub&utm_source=edgehub&utm_campaign=boxtop&adid=edgehub\" class=\"p-adBox__btn -btn1\" target=\"_blank\" rel=\"noopener nofollow\">\u516c\u5f0f\u30b5\u30a4\u30c8\u3092\u898b\u308b<\/a><a href=\"https:\/\/soroban.highreso.jp\/download\/compute-catalog\/?utm_medium=edgehub&utm_source=edgehub&utm_campaign=boxtop&adid=edgehub\" class=\"p-adBox__btn -btn2\" target=\"_blank\" rel=\"noopener nofollow\">\u8cc7\u6599\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9<\/a><\/div><\/div><\/p>\n<\/div>\n\n\n\n<h2 class=\"wp-block-heading\">\u300cSimpleDirectoryReader\u300d\u3067\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3092\u8aad\u307f\u8fbc\u3080<\/h2>\n\n\n\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"600\" height=\"300\" src=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/09\/36_midashi_0921.jpg\" alt=\"\u898b\u51fa\u3057\u753b\u50cf\" class=\"wp-image-11872\" srcset=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/09\/36_midashi_0921.jpg 600w, https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/09\/36_midashi_0921-300x150.jpg 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/h3>\n\n\n\n<p><span class=\"swl-format-2\">\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3092\u8aad\u307f\u8fbc\u307f\u306b\u306f\u3001LlamaIndex\u306e\u300cSimpleDirectoryReader\u300d\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<p>\u5fc5\u8981\u306a\u30d1\u30c3\u30b1\u30fc\u30b8\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>pip install llama-index llama-index-core<\/code><\/pre>\n\n\n\n<p>\u5fc5\u8981\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30dd\u30fc\u30c8\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>from llama_index.core import SimpleDirectoryReader<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u30c6\u30ad\u30b9\u30c8\u30d5\u30a1\u30a4\u30eb\u306e\u8aad\u307f\u8fbc\u307f<\/h3>\n\n\n\n<p><span class=\"swl-format-2\">SimpleDirectoryReader\u3092\u4f7f\u3063\u3066\u30c6\u30ad\u30b9\u30c8\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>file_path = \"\u4ee4\u548c5\u5e74\u5ea6_\u653f\u5e9c\u304c\u8b1b\u3058\u305f\u3053\u3069\u3082\u65bd\u7b56\u306e\u5b9f\u65bd\u72b6\u6cc1.txt\"\n\nreader = SimpleDirectoryReader(input_files=&#091;file_path])\ndocuments = reader.load_data()\nprint(repr(documents&#091;0])&#091;:1000]) <\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code has-swl-gray-background-color has-background\"><code>Document(id_='141d9b36-c23d-4ab0-a746-18d70b175097', embedding=None, metadata={'file_path': '\u4ee4\u548c5\u5e74\u5ea6_\u653f\u5e9c\u304c\u8b1b\u3058\u305f\u3053\u3069\u3082\u65bd\u7b56\u306e\u5b9f\u65bd\u72b6\u6cc1.txt', 'file_name': '\u4ee4\u548c5\u5e74\u5ea6_\u653f\u5e9c\u304c\u8b1b\u3058\u305f\u3053\u3069\u3082\u65bd\u7b56\u306e\u5b9f\u65bd\u72b6\u6cc1.txt', 'file_type': 'text\/plain', 'file_size': 77563, 'creation_date': '2024-09-28', 'last_modified_date': '2024-09-28'}, excluded_embed_metadata_keys=&#091;'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=&#091;'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\u6211\u304c\u56fd\u306b\u304a\u3051\u308b\u3053\u3069\u3082\u3092\u3081\u3050\u308b\u72b6\u6cc1\\r\\n\u7b2c\uff11\u90e8\\r\\n\u25cb\u51fa\u751f\\r\\n2022\u5e74\u306e\u51fa\u751f\u6570\u306f77\u4e07759\u4eba\u3067\u3001\u7d71\u8a08\u3092\u958b\u59cb\u3057\u305f1899\u5e74\u4ee5\u6765\u3001\u6700\u5c11\u306e\u6570\u5b57\u3068\u306a\u308a\u300180\\r\\n\u4e07\u4eba\u3092\u5272\u3063\u305f\u3002\u7b2c\uff11\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\u30e0\u671f\uff081947\uff5e1949\u5e74\uff09\u306b\u306f\u7d04270\u4e07\u4eba\u3001\u7b2c\uff12\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\\r\\n\u30e0\u671f\uff081971\uff5e1974\u5e74\uff09\u306b\u306f\u7d04210\u4e07\u4eba\u3067\u3042\u3063\u305f\u304c\u3001\u305d\u306e\u5f8c\u6e1b\u5c11\u3092\u7d9a\u3051\u3001\u3053\u3069\u3082\u306e\u6570\u306f\u30d4\u30fc\\r\\n\u30af\u306e\uff13\u5206\u306e\uff11\u4ee5\u4e0b\u306b\u307e\u3067\u6e1b\u5c11\u3057\u305f\u3002\\r\\n\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387\u306e\u63a8\u79fb\u3092\u898b\u308b\u3068\u3001\u7b2c\uff11\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\u30e0\u671f\u306b\u306f4.3\u3092\u8d85\u3048\u3066\u3044\u305f\u304c\u3001\u7b2c\uff12\u6b21\u30d9\\r\\n\u30d3\u30fc\u30d6\u30fc\u30e0\u671f\u306b\u306f\u7d042.1\u307e\u3067\u4f4e\u4e0b\u30012005\u5e74\u306b\u306f1.26\u307e\u3067\u843d\u3061\u8fbc\u307f\u3001\u305d\u306e\u5f8c\u30012015\u5e74\u306b\u306f\\r\\n1.45\u307e\u3067\u56de\u5fa9\u3057\u305f\u3082\u306e\u306e\u30012022\u5e74\u306b\u306f1.26\u3068\u904e\u53bb\u6700\u4f4e\u3068\u306a\u3063\u305f\u3002\\r\\n\u56f3\u88681-1-1 \u51fa\u751f\u6570\u3068\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387\u306e\u63a8\u79fb\\r\\n\u51fa\\r\\n\u751f\\r\\n\u6570\\r\\n\u51fa\u751f\u6570\\r\\n\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387\\r\\n1989 \u5e74\\r\\n\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387 1.57\\r\\n\u7b2c\uff11\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\u30e0\uff081947<\/code><\/pre>\n\n\n\n<div class=\"swell-block-capbox cap_box is-style-onborder_ttl2\" data-colset=\"col3\"><div class=\"cap_box_ttl\"><span>\u30b3\u30fc\u30c9\u306e\u8aac\u660e<\/span><\/div><div class=\"cap_box_content\">\n<p><span class=\"swl-format-2\">reader = SimpleDirectoryReader(input_files=[file_path])<\/span><\/p>\n\n\n\n<p><code>input_files<\/code>\u306b\u3001\u8aad\u307f\u8fbc\u3080\u30d5\u30a1\u30a4\u30eb\u306e\u30d1\u30b9\u3092\u6e21\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p><code>SimpleDirectoryReader<\/code>\u30af\u30e9\u30b9\u306e\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u3092\u4f5c\u6210\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p><span class=\"swl-format-2\">documents = reader.load_data()<\/span><\/p>\n\n\n\n<p><code>load_data()<\/code>\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u6307\u5b9a\u3057\u305f\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002<\/p>\n<\/div><\/div>\n\n\n\n<h3 class=\"wp-block-heading\">PDF\u306e\u8aad\u307f\u8fbc\u307f<\/h3>\n\n\n\n<p><span class=\"swl-format-2\">SimpleDirectoryReader\u3092\u4f7f\u3063\u3066PDF\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>file_path2 = \"\u4ee4\u548c6\u5e74\u7248_\u539a\u751f\u52b4\u50cd\u767d\u66f8\u6982\u8981.pdf\"\n\nreader = SimpleDirectoryReader(input_files=&#091;file_path2])\ndocuments = reader.load_data()\nprint(repr(documents&#091;0])&#091;:1000]) <\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code has-swl-gray-background-color has-background\"><code>Document(id_='141d9b36-c23d-4ab0-a746-18d70b175097', embedding=None, metadata={'file_path': '\u4ee4\u548c5\u5e74\u5ea6_\u653f\u5e9c\u304c\u8b1b\u3058\u305f\u3053\u3069\u3082\u65bd\u7b56\u306e\u5b9f\u65bd\u72b6\u6cc1.txt', 'file_name': '\u4ee4\u548c5\u5e74\u5ea6_\u653f\u5e9c\u304c\u8b1b\u3058\u305f\u3053\u3069\u3082\u65bd\u7b56\u306e\u5b9f\u65bd\u72b6\u6cc1.txt', 'file_type': 'text\/plain', 'file_size': 77563, 'creation_date': '2024-09-28', 'last_modified_date': '2024-09-28'}, excluded_embed_metadata_keys=&#091;'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=&#091;'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\u6211\u304c\u56fd\u306b\u304a\u3051\u308b\u3053\u3069\u3082\u3092\u3081\u3050\u308b\u72b6\u6cc1\\r\\n\u7b2c\uff11\u90e8\\r\\n\u25cb\u51fa\u751f\\r\\n2022\u5e74\u306e\u51fa\u751f\u6570\u306f77\u4e07759\u4eba\u3067\u3001\u7d71\u8a08\u3092\u958b\u59cb\u3057\u305f1899\u5e74\u4ee5\u6765\u3001\u6700\u5c11\u306e\u6570\u5b57\u3068\u306a\u308a\u300180\\r\\n\u4e07\u4eba\u3092\u5272\u3063\u305f\u3002\u7b2c\uff11\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\u30e0\u671f\uff081947\uff5e1949\u5e74\uff09\u306b\u306f\u7d04270\u4e07\u4eba\u3001\u7b2c\uff12\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\\r\\n\u30e0\u671f\uff081971\uff5e1974\u5e74\uff09\u306b\u306f\u7d04210\u4e07\u4eba\u3067\u3042\u3063\u305f\u304c\u3001\u305d\u306e\u5f8c\u6e1b\u5c11\u3092\u7d9a\u3051\u3001\u3053\u3069\u3082\u306e\u6570\u306f\u30d4\u30fc\\r\\n\u30af\u306e\uff13\u5206\u306e\uff11\u4ee5\u4e0b\u306b\u307e\u3067\u6e1b\u5c11\u3057\u305f\u3002\\r\\n\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387\u306e\u63a8\u79fb\u3092\u898b\u308b\u3068\u3001\u7b2c\uff11\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\u30e0\u671f\u306b\u306f4.3\u3092\u8d85\u3048\u3066\u3044\u305f\u304c\u3001\u7b2c\uff12\u6b21\u30d9\\r\\n\u30d3\u30fc\u30d6\u30fc\u30e0\u671f\u306b\u306f\u7d042.1\u307e\u3067\u4f4e\u4e0b\u30012005\u5e74\u306b\u306f1.26\u307e\u3067\u843d\u3061\u8fbc\u307f\u3001\u305d\u306e\u5f8c\u30012015\u5e74\u306b\u306f\\r\\n1.45\u307e\u3067\u56de\u5fa9\u3057\u305f\u3082\u306e\u306e\u30012022\u5e74\u306b\u306f1.26\u3068\u904e\u53bb\u6700\u4f4e\u3068\u306a\u3063\u305f\u3002\\r\\n\u56f3\u88681-1-1 \u51fa\u751f\u6570\u3068\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387\u306e\u63a8\u79fb\\r\\n\u51fa\\r\\n\u751f\\r\\n\u6570\\r\\n\u51fa\u751f\u6570\\r\\n\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387\\r\\n1989 \u5e74\\r\\n\u5408\u8a08\u7279\u6b8a\u51fa\u751f\u7387 1.57\\r\\n\u7b2c\uff11\u6b21\u30d9\u30d3\u30fc\u30d6\u30fc\u30e0\uff081947<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">CSV\u306e\u8aad\u307f\u8fbc\u307f<\/h3>\n\n\n\n<p><span class=\"swl-format-2\">SimpleDirectoryReader\u3092\u4f7f\u3063\u3066CSV\u3092\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>file_path3 = \"\u4ee4\u548c5\u5e746\u6708_\u793e\u4f1a\u533b\u7642\u8a3a\u7642\u884c\u70ba\u5225\u7d71\u8a08.csv\"\n\nreader = SimpleDirectoryReader(input_files=&#091;file_path3])\ndocuments = reader.load_data()\nprint(repr(documents&#091;0])&#091;:1000]) <\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code has-swl-gray-background-color has-background\"><code>Document(id_='16a8a52c-1074-483b-9709-1de5d201731a', embedding=None, metadata={'file_path': '\u4ee4\u548c5\u5e746\u6708_\u793e\u4f1a\u533b\u7642\u8a3a\u7642\u884c\u70ba\u5225\u7d71\u8a08.csv', 'file_name': '\u4ee4\u548c5\u5e746\u6708_\u793e\u4f1a\u533b\u7642\u8a3a\u7642\u884c\u70ba\u5225\u7d71\u8a08.csv', 'file_type': 'text\/csv', 'file_size': 2735096, 'creation_date': '2024-09-28', 'last_modified_date': '2024-09-28'}, excluded_embed_metadata_keys=&#091;'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=&#091;'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\u8a3a\u7642\u884c\u70ba\u306e\u72b6\u6cc1\\u3000\u533b\u79d1\u8a3a\u7642\\u3000\u7b2c\uff12\uff12\u8868\\u3000\u533b\u79d1\u8a3a\u7642\uff08\u7dcf\u6570\uff0d\uff11\u7dcf\u6570\uff09\\u3000\u5b9f\u65bd\u4ef6\u6570\u30fb\u56de\u6570\u30fb\u70b9\u6570\uff0c\u4e00\u822c\u533b\u7642\uff0d\u5f8c\u671f\u533b\u7642\u3001\u8a3a\u7642\u884c\u70ba\uff08\u7d30\u5206\u985e\uff09\u3001\u75c5\u9662\uff08\u7a2e\u985e\u5225\uff09\uff0d\u8a3a\u7642\u6240\uff08\u6709\u5e8a\uff0d\u7121\u5e8a\uff09\u5225, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan\\n\u6ce8\uff1a\uff11\uff09\u8868\u5074\u6b04\u5185\u306e\u6570\u5b57\u306f\u56fa\u5b9a\u70b9\u6570\u3067\u3042\u308b\u3002, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,<\/code><\/pre>\n\n\n\n<p class=\"is-style-icon_good\">\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3092\u8aad\u307f\u8fbc\u3080\u3053\u3068\u304c\u3067\u304d\u307e\u3057\u305f\uff01<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u300cSimpleWebPageReader\u300d\u3067Web\u30da\u30fc\u30b8\u3092\u8aad\u307f\u8fbc\u3080<\/h2>\n\n\n\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"600\" height=\"300\" src=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/08\/swallow1_midashi.jpg\" alt=\"\u898b\u51fa\u3057\u753b\u50cf\" class=\"wp-image-9570\" srcset=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/08\/swallow1_midashi.jpg 600w, https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/08\/swallow1_midashi-300x150.jpg 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/h3>\n\n\n\n<p><span class=\"swl-format-2\">Web\u30da\u30fc\u30b8\u306e\u8aad\u307f\u8fbc\u307f\u306b\u306f\u3001LlamaIndex\u306e\u300cSimpleWebPageReader\u300d\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<\/span><\/p>\n\n\n\n<p>\u5fc5\u8981\u306a\u30d1\u30c3\u30b1\u30fc\u30b8\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>pip install llama-index llama-index-readers-web<\/code><\/pre>\n\n\n\n<p>\u5fc5\u8981\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30dd\u30fc\u30c8\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>from llama_index.readers.web import SimpleWebPageReader<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">Web\u30da\u30fc\u30b8\u306e\u8aad\u307f\u8fbc\u307f<\/h3>\n\n\n\n<pre class=\"wp-block-code has-black-background-color has-background\"><code>urls = \"https:\/\/ja.wikipedia.org\/wiki\/%E8%8F%AF%E6%97%8F\"\n\ndocuments = SimpleWebPageReader(html_to_text=True).load_data(&#091;urls])\nprint(repr(documents&#091;0])&#091;10000:11000]) <\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code has-swl-gray-background-color has-background\"><code>\/%E9%AB%98%E6%9C%A8%E5%85%AB%E5%B0%BA\\n\"\u9ad8\u6728\u516b\u5c3a\")\u3001\u9577\u7537\u91d1\u6a39\u3002\\n\\n&#091;\u7248\u7c4d\u5949\u9084](\/wiki\/%E7%89%88%E7%B1%8D%E5%A5%89%E9%82%84\\n\"\u7248\u7c4d\u5949\u9084\")\u304c\u884c\u308f\u308c\u305f\u660e\u6cbb2\u5e74&#091;6\u670817\u65e5](\/wiki\/6%E6%9C%8817%E6%97%A5_\\\\(%E6%97%A7%E6%9A%A6\\\\)\\n\"6\u670817\u65e5 \\\\(\u65e7\u66a6\\\\)\")\uff08&#091;1869\u5e74](\/wiki\/1869%E5%B9%B4\\n\"1869\u5e74\")&#091;7\u670825\u65e5](\/wiki\/7%E6%9C%8825%E6%97%A5\\n\"7\u670825\u65e5\")\uff09\u306e\u884c\u653f\u5b98\u9054\u7b2c\u4e94\u56db\u4e8c\u53f7\u3067&#091;\u516c\u537f](\/wiki\/%E5%85%AC%E5%8D%BF\\n\"\u516c\u537f\")\uff08&#091;\u516c\u5bb6](\/wiki\/%E5%85%AC%E5%AE%B6\\n\"\u516c\u5bb6\")\u306e&#091;\u5802\u4e0a\u5bb6](\/wiki\/%E5%A0%82%E4%B8%8A%E5%AE%B6\\n\"\u5802\u4e0a\u5bb6\")\uff09\u3068&#091;\u8af8\u4faf](\/wiki\/%E8%AB%B8%E4%BE%AF \"\u8af8\u4faf\")\uff08&#091;\u5927\u540d](\/wiki\/%E5%A4%A7%E5%90%8D\\n\"\u5927\u540d\")\uff09\u306e\u79f0\u304c\u5ec3\u3055\u308c\u3001\u83ef\u65cf\u3068\u6539\u3081\u3089\u308c\u305f&#091;1]&#091;2]\u3002\u3053\u306e\u6642\u4ee5\u964d&#091;\u83ef\u65cf\u4ee4](\/wiki\/%E8%8F%AF%E6%97%8F%E4%BB%A4\\n\"\u83ef\u65cf\u4ee4\")\u5236\u5b9a\u4ee5\u524d\u306b\u83ef\u65cf\u306b\u5217\u3057\u305f\u5bb6\u3092\u300c\u65e7\u83ef\u65cf\u300d\u3068\u547c\u3076\u3053\u3068\u304c\u3042\u3063\u305f&#091;3]&#091;4]\u3002\u307e\u305f\u65e7\u516c\u5bb6\u306e\u83ef\u65cf\u306f\u300c\u5802\u4e0a\u83ef\u65cf\u300d&#091;5]\u3001\u65e7\u5927\u540d\u306e\u83ef\u65cf\u306f\u300c\u5927\u540d\u83ef\u65cf\u300d\u3068\u547c\u3076\u3053\u3068\u3082\u3042\u3063\u305f&#091;6]\u3002\\n\\n\u65e7\u83ef\u65cf\u6642\u4ee3\u306b\u306f\u7235\u4f4d\u306f\u5b58\u5728\u305b\u305a\u3001\u4e16\u8972\u5236\u306e\u6c38\u4e16\u83ef\u65cf\u3068\u4e00\u4ee3\u9650\u308a\u306e&#091;\u7d42\u8eab\u83ef\u65cf](\/wiki\/%E7%B5%82%E8%BA%AB%E8%8F%AF%E6%97%8F\\n\"\u7d42\u8eab\u83ef\u65cf\")\u306e\u5225\u304c\u3042\u3063\u305f\u304c&#091;3]\u3001\u660e\u6cbb17\u5e747\u67087\u65e5\u306b\u516c\u5e03\u3055\u308c\u305f&#091;\u83ef\u65cf\u4ee4](\/wiki\/%E8%8F%AF%E6%97%8F%E4%BB%A4\\n\"\u83ef\u65cf\u4ee4\")\u306b\u3088\u308a&#091;\u516c\u7235](\/wiki\/%E5%85%AC%E7%88%B5 \"\u516c\u7235\")\u3001&#091;\u4faf\u7235](\/wiki\/%E4%BE%AF%E7%88%B5\\n\"\u4faf\u7235\")\u3001&#091;\u4f2f\u7235](\/wiki\/%E4%BC%AF%E7%88%B5 \"\u4f2f\u7235\")\u3001&#091;\u5b50\u7235](\/wiki\/%E5%AD%90%E7%88%B5\\n\"\u5b50\u7235\")\u3001&#091;\u7537\u7235](\/wiki\/%E7%94%B7%E7%88%B5\\n\"\u7537\u7235\")\u306e\u4e94\u7235\u5236\u304c<\/code><\/pre>\n\n\n\n<div class=\"swell-block-capbox cap_box is-style-onborder_ttl2\" data-colset=\"col3\"><div class=\"cap_box_ttl\"><span>\u30b3\u30fc\u30c9\u306e\u8aac\u660e<\/span><\/div><div class=\"cap_box_content\">\n<p><span class=\"swl-format-2\">documents = SimpleWebPageReader(html_to_text=True).load_data([urls])<\/span><\/p>\n\n\n\n<p><code>SimpleWebPageReader<\/code>\u306f\u30a6\u30a7\u30d6\u30da\u30fc\u30b8\u3092\u8aad\u307f\u8fbc\u3080\u305f\u3081\u306e\u30af\u30e9\u30b9\u3067\u3059\u3002\u6307\u5b9a\u3057\u305fURL\u304b\u3089HTML\u30c7\u30fc\u30bf\u3092\u53d6\u5f97\u3057\u3001\u305d\u308c\u3092\u30c6\u30ad\u30b9\u30c8\u5f62\u5f0f\u306b\u5909\u63db\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p><code>html_to_text=True<\/code>\u306f\u3001\u8aad\u307f\u8fbc\u3093\u3060HTML\u30c7\u30fc\u30bf\u3092\u30c6\u30ad\u30b9\u30c8\u306b\u5909\u63db\u3059\u308b\u30aa\u30d7\u30b7\u30e7\u30f3\u3067\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001HTML\u30bf\u30b0\u3092\u53d6\u308a\u9664\u304d\u3001\u7d14\u7c8b\u306a\u30c6\u30ad\u30b9\u30c8\u306e\u307f\u304c\u5f97\u3089\u308c\u307e\u3059\u3002<\/p>\n<\/div><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"index_id8\">\u751f\u6210AI\u30fbLLM\u306e\u30b3\u30b9\u30c8\u3067\u304a\u56f0\u308a\u306a\u3089<\/h2>\n\n\n\n<p>GPU\u306e\u30b9\u30da\u30c3\u30af\u4e0d\u8db3\u3067\u751f\u6210AI\u306e\u958b\u767a\u304c\u601d\u3046\u3088\u3046\u306b\u9032\u307e\u306a\u3044\u3053\u3068\u306f\u3042\u308a\u307e\u305b\u3093\u304b\uff1f<\/p>\n\n\n\n<p>\u305d\u3093\u306a\u3068\u304d\u306b\u306f\u3001<strong><span class=\"swl-format-2\">\u9ad8\u6027\u80fd\u306aGPU\u3092\u30ea\u30fc\u30ba\u30ca\u30d6\u30eb\u306a\u4fa1\u683c\u3067\u4f7f\u3048\u308bGPU\u30af\u30e9\u30a6\u30c9\u30b5\u30fc\u30d3\u30b9\u304c\u304a\u3059\u3059\u3081\u3067\u3059\uff01<\/span><\/strong><\/p>\n\n\n\n<div class=\"p-blogParts post_content\" data-partsID=\"4543\">\n<div class=\"swell-block-capbox cap_box\" data-colset=\"col2\"><div class=\"cap_box_ttl\"><span>GPUSOROBAN<\/span><\/div><div class=\"cap_box_content\">\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"800\" height=\"283\" src=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/06\/gpusorobanpr_1.jpg\" alt=\"GPUSOROBAN\" class=\"wp-image-4552\" srcset=\"https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/06\/gpusorobanpr_1.jpg 800w, https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/06\/gpusorobanpr_1-300x106.jpg 300w, https:\/\/highreso.jp\/edgehub\/wp-content\/uploads\/2024\/06\/gpusorobanpr_1-768x272.jpg 768w\" sizes=\"(max-width: 800px) 100vw, 800px\" \/><\/figure>\n\n\n\n<p>GPUSOROBAN\u306f\u3001<span class=\"swl-format-1\"><span class=\"swl-marker mark_yellow\">\u751f\u6210AI\u30fbLLM\u5411\u3051\u306e\u9ad8\u901fGPU\u3092\u696d\u754c\u6700\u5b89\u7d1a\u306e\u6599\u91d1\u3067\u4f7f\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/span><\/span><\/p>\n\n\n\n<p>\u30a4\u30f3\u30bf\u30fc\u30cd\u30c3\u30c8\u74b0\u5883\u3055\u3048\u3042\u308c\u3070\u3001\u30af\u30e9\u30a6\u30c9\u74b0\u5883\u306eGPU\u30b5\u30fc\u30d0\u30fc\u3092\u3059\u3050\u306b\u5229\u7528\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u5927\u898f\u6a21\u306a\u8a2d\u5099\u6295\u8cc7\u306e\u5fc5\u8981\u304c\u306a\u304f\u3001\u7169\u96d1\u306a\u30b5\u30fc\u30d0\u30fc\u7ba1\u7406\u304b\u3089\u3082\u89e3\u653e\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"swell-block-columns\"><div class=\"swell-block-columns__inner\">\n<div class=\"swell-block-column swl-has-mb--s\">\n<div class=\"swell-block-button blue_ -size-l is-style-btn_normal\"><a href=\"https:\/\/soroban.highreso.jp\/?utm_medium=edgehub&amp;utm_source=edgehub&amp;utm_campaign=highend_boxa&amp;adid=edgehub\" class=\"swell-block-button__link\"><span>\u516c\u5f0f\u30b5\u30a4\u30c8\u3092\u307f\u308b<\/span><\/a><\/div>\n<\/div>\n\n\n\n<div class=\"swell-block-column swl-has-mb--s\">\n<div class=\"swell-block-button red_ -size-l is-style-btn_normal\"><a href=\"https:\/\/soroban.highreso.jp\/download\/compute-catalog\/?utm_medium=edgehub&amp;utm_source=edgehub&amp;utm_campaign=highend_boxa&amp;adid=edgehub\" class=\"swell-block-button__link\"><span>\u8cc7\u6599\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9<\/span><\/a><\/div>\n<\/div>\n<\/div><\/div>\n<\/div><\/div>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>LlamaIndex\u3067\u306f\u3001\u591a\u69d8\u306a\u5f62\u5f0f\u306e\u30c7\u30fc\u30bf\u3082\u3068\u306bRAG\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002 \u3053\u306e\u8a18\u4e8b\u3067\u306f\u3001LlamaIndex\u3067\u30c6\u30ad\u30b9\u30c8\u30fbPDF\u30fbCSV\u3001Web\u30da\u30fc\u30b8\u306a\u3069\u306e\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002 LlamaIndex [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":12141,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"swell_btn_cv_data":"","footnotes":""},"categories":[20],"tags":[],"class_list":["post-12103","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-machinelearning"],"_links":{"self":[{"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/posts\/12103","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/comments?post=12103"}],"version-history":[{"count":3,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/posts\/12103\/revisions"}],"predecessor-version":[{"id":13326,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/posts\/12103\/revisions\/13326"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/media\/12141"}],"wp:attachment":[{"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/media?parent=12103"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/categories?post=12103"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/highreso.jp\/edgehub\/wp-json\/wp\/v2\/tags?post=12103"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}