{"id":826,"date":"2024-11-27T10:39:47","date_gmt":"2024-11-27T02:39:47","guid":{"rendered":"https:\/\/linguopeng.top\/?p=826"},"modified":"2024-11-27T10:52:26","modified_gmt":"2024-11-27T02:52:26","slug":"hmdb%e7%88%ac%e8%99%ab","status":"publish","type":"post","link":"https:\/\/linguopeng.top\/?p=826","title":{"rendered":"HMDB\u722c\u866b"},"content":{"rendered":"\n<pre class=\"wp-block-code\"><code>#\u6765\u6e90\u4e8ehttps:\/\/mp.weixin.qq.com\/s\/u1XxeOs9gtVYtURD4dGB7Q\nlibrary(XML)\nlibrary(httr) \nlibrary(magrittr)\nlibrary(rvest)\nlibrary(xml2)\nlibrary(stringr)\nlibrary(tidyverse)\n#devtools::install_github(\"ChristianAmes\/HMDBScraper\")\nlibrary(HMDBScraper)#\u5305\u5728github\u4e0a\uff0c\u6ce8\u610f\u5b89\u88c5\u65b9\u5f0f\n# web &lt;- read_html(\"https:\/\/hmdb.ca\/bmi_metabolomics\")\n# pag1 &lt;- web %>%\n#   html_table() %>%.&#91;&#91;1]]\n# pag1\ndir.create(\".\/HMDB\u722c\u866b\/\")\n#--\u7b2c\u4e00\u90e8\u5206\uff1abmi\u4ee3\u8c22\u79df\u6570\u636e#-----------\n# 294\u662f\u5728\u7f51\u9875\u76f4\u63a5\u770b\u5230\u7684\u603b\u6761\u6570\uff0c25\u662f\u6bcf\u9875\u663e\u793a\u7684\u6761\u6570\u3002\npages = 1:ceiling(294 \/ 25)\nurl &lt;- \"http:\/\/www.hmdb.ca\/bmi_metabolomics?page=\"\nurl_all &lt;- paste(url, pages, sep=\"\")\nfor (i in pages) {\n  tem = url_all&#91;i] %>% \n    read_html %>%\n    html_table() %>%.&#91;&#91;1]]\n  if (i == 1) {\n    dat = tem\n  } else {\n    dat = rbind(dat,tem)\n  }\n}\nhead(dat)\nwrite.csv(dat,\".\/HMDB\u722c\u866b\/bim_HMDB.csv\")\n#--\u7b2c\u4e8c\u90e8\u5206\uff1aage\u4ee3\u8c22\u79df\u6570\u636e#-----------\npages = 1:ceiling(418 \/ 25)\nurl &lt;- \"https:\/\/hmdb.ca\/age_metabolomics?page=\"\nurl_all &lt;- paste(url, pages, sep=\"\")\nfor (i in pages) {\n  tem = url_all&#91;i] %>% \n    read_html %>%\n    html_table() %>%.&#91;&#91;1]]\n  \n  if (i == 1) {\n    dat1 = tem\n  } else {\n    dat1 = rbind(dat1,tem)\n  }\n}\ndim(dat1)\nhead(dat1)\nwrite.csv(dat1,\".\/HMDB\u722c\u866b\/age_HMDB.csv\")\n#--\u7b2c\u4e09\u90e8\u5206\uff1agender\u4ee3\u8c22\u79df\u6570\u636e#-----------\npages = 1:ceiling(515 \/ 25)\nurl &lt;- \"https:\/\/hmdb.ca\/gender_metabolomics?page=\"\nurl_all &lt;- paste(url, pages, sep=\"\")\n\nfor (i in pages) {\n  tem = url_all&#91;i] %>% \n    read_html %>%\n    html_table() %>%.&#91;&#91;1]]\n  \n  if (i == 1) {\n    dat2 = tem\n  } else {\n    dat2 = rbind(dat2,tem)\n  }\n}\ndim(dat2)\nhead(dat2)\nwrite.csv(dat2,\".\/HMDB\u722c\u866b\/gender_HMDB.csv\")\n#--\u7b2c\u56db\u90e8\u5206\uff1ageno\u4ee3\u8c22\u79df\u6570\u636e#-----------\npages = 1:ceiling(6777 \/ 25)\nurl &lt;- \"https:\/\/hmdb.ca\/geno_metabolomics?page=\"\nurl_all &lt;- paste(url, pages, sep=\"\")\n\nfor (i in pages) {\n  tem = url_all&#91;i] %>% \n    read_html %>%\n    html_table() %>%.&#91;&#91;1]]\n  \n  if (i == 1) {\n    dat3 = tem\n  } else {\n    dat3 = rbind(dat3,tem)\n  }\n}\nhead(dat3)\ndim(dat3)\nwrite.csv(dat3,\".\/HMDB\u722c\u866b\/geno_HMDB.csv\")\n#--\u7b2c\u4e94\u90e8\u5206\uff1apharmaco\u4ee3\u8c22\u79df\u6570\u636e#-----------\npages = 1:ceiling(2497 \/ 25)\nurl &lt;- \"https:\/\/hmdb.ca\/pharmaco_metabolomics?page=\"\nurl_all &lt;- paste(url, pages, sep=\"\")\n\nfor (i in pages) {\n  tem = url_all&#91;i] %>% \n    read_html %>%\n    html_table() %>%.&#91;&#91;1]]\n  \n  if (i == 1) {\n    dat4 = tem\n  } else {\n    dat4 = rbind(dat4,tem)\n  }\n}\nhead(dat4)\ndim(dat4)\nwrite.csv(dat4,\".\/HMDB\u722c\u866b\/pharmaco_HMDB.csv\")\n#--\u7b2c\u516d\u90e8\u5206\uff1adiurnal\u4ee3\u8c22\u79df\u6570\u636e#-----------\npages = 1:ceiling(2315 \/ 25)\nurl &lt;- \"https:\/\/hmdb.ca\/pharmaco_metabolomics?page=\"\nurl_all &lt;- paste(url, pages, sep=\"\")\n\nfor (i in pages) {\n  tem = url_all&#91;i] %>% \n    read_html %>%\n    html_table() %>%.&#91;&#91;1]]\n  \n  if (i == 1) {\n    dat5 = tem\n  } else {\n    dat5 = rbind(dat5,tem)\n  }\n}\nhead(dat5)\ndim(dat5)\nwrite.csv(dat5,\".\/HMDB\u722c\u866b\/diurnal_HMDB.csv\")\n############\u5408\u5e76\u6570\u636e\ndatall = data.frame(ID = c(dat$Metabolite,\n                           dat1$Metabolite,\n                           dat2$Metabolite,\n                           dat3$Metabolite,\n                           dat4$Metabolite,\n                           dat5$Metabolite))\ntail(datall,20)\ndatall$metabolite =  \n  sapply(strsplit(datall$ID, \"&#91;(]HMDB\"), `&#91;`, 1)\ndatall$HMDB_ID =  \n  sapply(strsplit(datall$ID, \"&#91;(]HMDB\"), `&#91;`, 2)\ndatall$HMDB_ID = gsub(\")\",\"\",datall$HMDB_ID)\ndatall$HMDB_ID = paste(\"HMDB\",datall$HMDB_ID,sep = \"\")\n\nwrite.csv(datall,\".\/HMDB\u722c\u866b\/all_HMDB_ID_metabolites.csv\")\n\n\n#--\u83b7\u53d6\u4ee3\u8c22\u7269\u5168\u90e8\u4fe1\u606f\nget.entry&lt;- function(id, prefix= \"http:\/\/www.hmdb.ca\/metabolites\/\",check_availability=T){\n  if(check_availability){\n    if(!check.availability(id)){ stop(paste(id, \" id could not be found\"))}\n  }\n  #create link\n  link&lt;- paste(prefix,id,\".xml\",sep= \"\")\n  #download data\n  txt&lt;- try(readLines(link))\n  #error handling code, just try another 3 times\n  i&lt;-1\n  while(inherits(txt, \"try-error\"))\n  {\n    print(paste(\"Retrieving has failed for \",i,\". time\",sep=\"\"))\n    txt&lt;- try(readLines(link))\n    i= i+1\n    if(i>3) stop(\"Could not find Server or ID\")\n  }\n  #process data to be usable\n  data&lt;- XML::xmlTreeParse(txt,asText= T)\n  data&lt;- XML::xmlToList(data)\n  return (data)\n}\ntem = read.csv(\".\/HMDB\u722c\u866b\/all_HMDB_ID_metabolites.csv\",row.names = 1)\nhead(tem)\nid0 = tem$HMDB_ID\nA = c()\nB = c()\nC = c()\nD = c()\nE = c()\nG = c()\n#for (i in 373:length(id0)) {\nfor (i in 1:2) {\n  dat = try(get.entry(id = id0&#91;i]) , silent = FALSE)\n  # \u63d0\u53d6kegg id\n  if (!is.null(dat$kegg_id)) {\n    A&#91;i] = dat$kegg_id\n  } else{\n    A&#91;i] = \"\"\n  }\n  \n  if (!is.null(dat$taxonomy$description)) {\n    # \u63d0\u53d6\u63cf\u8ff0\u4fe1\u606f\n    B&#91;i] =dat$taxonomy$description\n  } else{\n    B&#91;i] = \"\"\n  }\n  \n  if (!is.null(dat$taxonomy$kingdom)) {\n    #-\u63d0\u53d6kingdom\u5206\u7c7b\n    C&#91;i] = dat$taxonomy$kingdom\n  } else{\n    B&#91;i] = \"\"\n  }\n  \n  if (!is.null(dat$taxonomy$super_class)) {\n    # \u63d0\u53d6super_class\u5206\u7c7b\n    D&#91;i] = dat$taxonomy$super_class\n  } else{\n    B&#91;i] = \"\"\n  }\n  \n  if (!is.null(dat$taxonomy$class)) {\n    # \u63d0\u53d6class\n    E&#91;i] = dat$taxonomy$class\n  } else{\n    B&#91;i] = \"\"\n  }\n  \n  if (!is.null(dat$taxonomy$sub_class)) {\n    # \u63d0\u53d6sub_class\u5206\u7c7b\n    G&#91;i]= dat$taxonomy$sub_class\n  } else{\n    B&#91;i] = \"\"\n  }\n  \n  print(i)\n  \n}\n\n# \u5408\u5e76\u5168\u90e8\u5185\u5bb9\ntax.hmdb = data.frame(\n  KEGGID = A,\n  Descrip = B,\n  Kingdom = C,\n  Super_class = D,\n  Class = E,\n  Sub_class = G)\n<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full\"><div class='fancybox-wrapper lazyload-container-unload' data-fancybox='post-images' href='https:\/\/linguopeng.top\/wp-content\/uploads\/2024\/11\/1-1.png'><img class=\"lazyload lazyload-style-1\" src=\"data:image\/svg+xml;base64,PCEtLUFyZ29uTG9hZGluZy0tPgo8c3ZnIHdpZHRoPSIxIiBoZWlnaHQ9IjEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgc3Ryb2tlPSIjZmZmZmZmMDAiPjxnPjwvZz4KPC9zdmc+\"  loading=\"lazy\" decoding=\"async\" width=\"147\" height=\"47\" data-original=\"https:\/\/linguopeng.top\/wp-content\/uploads\/2024\/11\/1-1.png\" src=\"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsQAAA7EAZUrDhsAAAANSURBVBhXYzh8+PB\/AAffA0nNPuCLAAAAAElFTkSuQmCC\" alt=\"\" class=\"wp-image-829\"\/><\/div><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full\"><div class='fancybox-wrapper lazyload-container-unload' data-fancybox='post-images' href='https:\/\/linguopeng.top\/wp-content\/uploads\/2024\/11\/2.png'><img class=\"lazyload lazyload-style-1\" src=\"data:image\/svg+xml;base64,PCEtLUFyZ29uTG9hZGluZy0tPgo8c3ZnIHdpZHRoPSIxIiBoZWlnaHQ9IjEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgc3Ryb2tlPSIjZmZmZmZmMDAiPjxnPjwvZz4KPC9zdmc+\"  loading=\"lazy\" decoding=\"async\" width=\"1264\" height=\"88\" data-original=\"https:\/\/linguopeng.top\/wp-content\/uploads\/2024\/11\/2.png\" src=\"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsQAAA7EAZUrDhsAAAANSURBVBhXYzh8+PB\/AAffA0nNPuCLAAAAAElFTkSuQmCC\" alt=\"\" class=\"wp-image-830\"\/><\/div><\/figure>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-826","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"_links":{"self":[{"href":"https:\/\/linguopeng.top\/index.php?rest_route=\/wp\/v2\/posts\/826","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/linguopeng.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/linguopeng.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/linguopeng.top\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/linguopeng.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=826"}],"version-history":[{"count":3,"href":"https:\/\/linguopeng.top\/index.php?rest_route=\/wp\/v2\/posts\/826\/revisions"}],"predecessor-version":[{"id":832,"href":"https:\/\/linguopeng.top\/index.php?rest_route=\/wp\/v2\/posts\/826\/revisions\/832"}],"wp:attachment":[{"href":"https:\/\/linguopeng.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=826"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/linguopeng.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=826"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/linguopeng.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=826"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}