[{"data":1,"prerenderedAt":1374},["ShallowReactive",2],{"doc-\u002Fdocs\u002Fobservabilidade\u002Fmetricas-logs":3,"docs-all":1303},{"id":4,"title":5,"body":6,"category":1287,"description":1288,"draft":1289,"extension":1290,"icon":1291,"lastReviewed":1292,"meta":1293,"navigation":345,"order":47,"path":1294,"prerequisites":1295,"readingTime":1296,"seo":1297,"stem":1298,"tags":1299,"__hash__":1302},"docs_pt\u002Fdocs\u002Fobservabilidade\u002Fmetricas-logs.md","Métricas e logs",{"type":7,"value":8,"toc":1268},"minimark",[9,13,16,21,26,34,70,73,81,85,153,156,160,167,239,255,266,269,273,276,292,298,302,306,309,313,396,400,507,511,514,589,592,596,599,792,795,799,802,1020,1023,1033,1037,1040,1091,1094,1097,1142,1146,1149,1238,1241,1245,1264],[10,11,12],"p",{},"Observabilidade costuma exigir uma pilha de software paralela ao cluster: agente de métrica em cada nó, série temporal central, agregador de logs, dashboard, alertador, tracer. Cinco componentes, cada um com sua configuração, sua atualização, sua conta.",[10,14,15],{},"O HeroCtl resolve isso por dentro. Métricas, logs, alertas e tracing já vêm embutidos no plano de controle. Você só pluga ferramenta externa quando o time tem motivo concreto para isso.",[17,18,20],"h2",{"id":19},"metricas","Métricas",[22,23,25],"h3",{"id":24},"o-endpoint-padrao","O endpoint padrão",[10,27,28,29,33],{},"Cada nó servidor expõe métricas em formato Prometheus em ",[30,31,32],"code",{},"\u002Fv1\u002Fmetrics",":",[35,36,41],"pre",{"className":37,"code":38,"language":39,"meta":40,"style":40},"language-bash shiki shiki-themes github-dark-default","curl -H \"X-Heroctl-Token: $TOKEN\" https:\u002F\u002Fmanage.exemplo.com\u002Fv1\u002Fmetrics\n","bash","",[30,42,43],{"__ignoreMap":40},[44,45,48,52,56,60,64,67],"span",{"class":46,"line":47},"line",1,[44,49,51],{"class":50},"sQhOw","curl",[44,53,55],{"class":54},"sFSAA"," -H",[44,57,59],{"class":58},"s9uIt"," \"X-Heroctl-Token: ",[44,61,63],{"class":62},"sZEs4","$TOKEN",[44,65,66],{"class":58},"\"",[44,68,69],{"class":58}," https:\u002F\u002Fmanage.exemplo.com\u002Fv1\u002Fmetrics\n",[10,71,72],{},"Saída típica (recortada):",[35,74,79],{"className":75,"code":77,"language":78},[76],"language-text","# HELP heroctl_node_cpu_usage_percent CPU em uso por nó\n# TYPE heroctl_node_cpu_usage_percent gauge\nheroctl_node_cpu_usage_percent{node=\"server-1\"} 23.4\nheroctl_node_cpu_usage_percent{node=\"server-2\"} 18.1\n\n# HELP heroctl_alloc_memory_bytes Memória usada por alocação\n# TYPE heroctl_alloc_memory_bytes gauge\nheroctl_alloc_memory_bytes{job=\"api\",alloc=\"abc123\"} 285212672\n","text",[30,80,77],{"__ignoreMap":40},[22,82,84],{"id":83},"o-que-vem-pronto","O que vem pronto",[86,87,88,101],"table",{},[89,90,91],"thead",{},[92,93,94,98],"tr",{},[95,96,97],"th",{},"Família de métricas",[95,99,100],{},"Exemplos",[102,103,104,113,121,129,137,145],"tbody",{},[92,105,106,110],{},[107,108,109],"td",{},"Nós",[107,111,112],{},"CPU, RAM, disco, rede, load average, uptime",[92,114,115,118],{},[107,116,117],{},"Alocações",[107,119,120],{},"CPU, RAM, restarts, status, idade",[92,122,123,126],{},[107,124,125],{},"Jobs",[107,127,128],{},"Réplicas saudáveis, alocações pendentes, deploys ativos",[92,130,131,134],{},[107,132,133],{},"Roteador",[107,135,136],{},"Requests\u002Fs, latência p50\u002Fp95\u002Fp99, erros 5xx por host",[92,138,139,142],{},[107,140,141],{},"Ingress TLS",[107,143,144],{},"Validade de certificado, falhas de renovação",[92,146,147,150],{},[107,148,149],{},"API interna",[107,151,152],{},"Latência, throughput, taxa de erro",[10,154,155],{},"Em um cluster recém-instalado, isso já alimenta um painel completo sem nenhuma configuração extra.",[22,157,159],{"id":158},"metricas-customizadas-da-aplicacao","Métricas customizadas da aplicação",[10,161,162,163,166],{},"Sua aplicação expõe ",[30,164,165],{},"\u002Fmetrics"," em qualquer porta, em formato Prometheus. Declare no spec:",[35,168,172],{"className":169,"code":170,"language":171,"meta":40,"style":40},"language-yaml shiki shiki-themes github-dark-default","job: api-pagamentos\nmetrics:\n  enabled: true\n  path: \u002Fmetrics\n  port: 9090\n  interval: 15s\n","yaml",[30,173,174,186,195,206,217,228],{"__ignoreMap":40},[44,175,176,180,183],{"class":46,"line":47},[44,177,179],{"class":178},"sPWt5","job",[44,181,182],{"class":62},": ",[44,184,185],{"class":58},"api-pagamentos\n",[44,187,189,192],{"class":46,"line":188},2,[44,190,191],{"class":178},"metrics",[44,193,194],{"class":62},":\n",[44,196,198,201,203],{"class":46,"line":197},3,[44,199,200],{"class":178},"  enabled",[44,202,182],{"class":62},[44,204,205],{"class":54},"true\n",[44,207,209,212,214],{"class":46,"line":208},4,[44,210,211],{"class":178},"  path",[44,213,182],{"class":62},[44,215,216],{"class":58},"\u002Fmetrics\n",[44,218,220,223,225],{"class":46,"line":219},5,[44,221,222],{"class":178},"  port",[44,224,182],{"class":62},[44,226,227],{"class":54},"9090\n",[44,229,231,234,236],{"class":46,"line":230},6,[44,232,233],{"class":178},"  interval",[44,235,182],{"class":62},[44,237,238],{"class":58},"15s\n",[10,240,241,242,244,245,247,248,247,251,254],{},"O cluster faz o scrape, agrega, e disponibiliza no mesmo endpoint ",[30,243,32],{},". As métricas saem rotuladas com ",[30,246,179],{},", ",[30,249,250],{},"alloc",[30,252,253],{},"node",", então a busca é direta:",[35,256,260],{"className":257,"code":258,"language":259,"meta":40,"style":40},"language-promql shiki shiki-themes github-dark-default","rate(http_requests_total{job=\"api-pagamentos\",status=~\"5..\"}[5m])\n","promql",[30,261,262],{"__ignoreMap":40},[44,263,264],{"class":46,"line":47},[44,265,258],{},[10,267,268],{},"Cliente Prometheus existe oficialmente para Go, Python, Java, Node.js, Ruby, .NET, Rust e PHP. Em qualquer uma dessas linguagens, instrumentar uma aplicação leva quinze minutos.",[17,270,272],{"id":271},"painel-embutido","Painel embutido",[10,274,275],{},"O painel admin (porta 8443) traz uma seção de gráficos pronta:",[277,278,279,283,286,289],"ul",{},[280,281,282],"li",{},"Visão de cluster: CPU, RAM, rede agregadas",[280,284,285],{},"Visão de job: réplicas, restarts, latência do roteador",[280,287,288],{},"Visão de alocação: stream de logs, métricas individuais",[280,290,291],{},"Visão de host: detalhe de cada nó, alocações nele",[10,293,294,295,297],{},"Para a maioria dos times, esse painel substitui um Grafana montado por fora. Quando você precisa de dashboards muito customizados ou de correlação com fontes externas, vale ligar um Grafana ao endpoint ",[30,296,32],{}," como datasource Prometheus comum.",[17,299,301],{"id":300},"logs","Logs",[22,303,305],{"id":304},"modelo-de-coleta","Modelo de coleta",[10,307,308],{},"Cada alocação tem stdout e stderr capturados pelo agente local, comprimidos, e enviados ao escritor central de logs do cluster. Não há agente de logs separado para instalar, configurar ou atualizar.",[22,310,312],{"id":311},"tail-em-tempo-real","Tail em tempo real",[35,314,316],{"className":37,"code":315,"language":39,"meta":40,"style":40},"# stream do job inteiro (todas as alocações)\nheroctl logs -f --job api-pagamentos\n\n# uma alocação específica\nheroctl logs -f --alloc abc123\n\n# só stderr\nheroctl logs -f --job api-pagamentos --stream stderr\n",[30,317,318,324,341,347,352,366,370,376],{"__ignoreMap":40},[44,319,320],{"class":46,"line":47},[44,321,323],{"class":322},"sH3jZ","# stream do job inteiro (todas as alocações)\n",[44,325,326,329,332,335,338],{"class":46,"line":188},[44,327,328],{"class":50},"heroctl",[44,330,331],{"class":58}," logs",[44,333,334],{"class":54}," -f",[44,336,337],{"class":54}," --job",[44,339,340],{"class":58}," api-pagamentos\n",[44,342,343],{"class":46,"line":197},[44,344,346],{"emptyLinePlaceholder":345},true,"\n",[44,348,349],{"class":46,"line":208},[44,350,351],{"class":322},"# uma alocação específica\n",[44,353,354,356,358,360,363],{"class":46,"line":219},[44,355,328],{"class":50},[44,357,331],{"class":58},[44,359,334],{"class":54},[44,361,362],{"class":54}," --alloc",[44,364,365],{"class":58}," abc123\n",[44,367,368],{"class":46,"line":230},[44,369,346],{"emptyLinePlaceholder":345},[44,371,373],{"class":46,"line":372},7,[44,374,375],{"class":322},"# só stderr\n",[44,377,379,381,383,385,387,390,393],{"class":46,"line":378},8,[44,380,328],{"class":50},[44,382,331],{"class":58},[44,384,334],{"class":54},[44,386,337],{"class":54},[44,388,389],{"class":58}," api-pagamentos",[44,391,392],{"class":54}," --stream",[44,394,395],{"class":58}," stderr\n",[22,397,399],{"id":398},"filtragem","Filtragem",[35,401,403],{"className":37,"code":402,"language":39,"meta":40,"style":40},"# entre dois timestamps\nheroctl logs --job api-pagamentos \\\n  --since \"2026-04-25 10:00\" \\\n  --until \"2026-04-25 11:00\"\n\n# busca textual\nheroctl logs --job api-pagamentos --since 1h | grep \"panic\"\n\n# saída estruturada para processar com jq\nheroctl logs --job api-pagamentos --since 1h --format json\n",[30,404,405,410,424,434,442,446,451,476,480,486],{"__ignoreMap":40},[44,406,407],{"class":46,"line":47},[44,408,409],{"class":322},"# entre dois timestamps\n",[44,411,412,414,416,418,420],{"class":46,"line":188},[44,413,328],{"class":50},[44,415,331],{"class":58},[44,417,337],{"class":54},[44,419,389],{"class":58},[44,421,423],{"class":422},"suJrU"," \\\n",[44,425,426,429,432],{"class":46,"line":197},[44,427,428],{"class":54},"  --since",[44,430,431],{"class":58}," \"2026-04-25 10:00\"",[44,433,423],{"class":422},[44,435,436,439],{"class":46,"line":208},[44,437,438],{"class":54},"  --until",[44,440,441],{"class":58}," \"2026-04-25 11:00\"\n",[44,443,444],{"class":46,"line":219},[44,445,346],{"emptyLinePlaceholder":345},[44,447,448],{"class":46,"line":230},[44,449,450],{"class":322},"# busca textual\n",[44,452,453,455,457,459,461,464,467,470,473],{"class":46,"line":372},[44,454,328],{"class":50},[44,456,331],{"class":58},[44,458,337],{"class":54},[44,460,389],{"class":58},[44,462,463],{"class":54}," --since",[44,465,466],{"class":58}," 1h",[44,468,469],{"class":422}," |",[44,471,472],{"class":50}," grep",[44,474,475],{"class":58}," \"panic\"\n",[44,477,478],{"class":46,"line":378},[44,479,346],{"emptyLinePlaceholder":345},[44,481,483],{"class":46,"line":482},9,[44,484,485],{"class":322},"# saída estruturada para processar com jq\n",[44,487,489,491,493,495,497,499,501,504],{"class":46,"line":488},10,[44,490,328],{"class":50},[44,492,331],{"class":58},[44,494,337],{"class":54},[44,496,389],{"class":58},[44,498,463],{"class":54},[44,500,466],{"class":58},[44,502,503],{"class":54}," --format",[44,505,506],{"class":58}," json\n",[22,508,510],{"id":509},"retencao","Retenção",[10,512,513],{},"Default: 30 dias por alocação ativa, 7 dias depois que a alocação termina. Configurável no spec do cluster:",[35,515,517],{"className":169,"code":516,"language":171,"meta":40,"style":40},"logs:\n  retention:\n    active_days: 30\n    terminated_days: 7\n  storage:\n    type: local\n    path: \u002Fvar\u002Flib\u002Fheroctl\u002Flogs\n    max_size_gb: 100\n",[30,518,519,525,532,542,552,559,569,579],{"__ignoreMap":40},[44,520,521,523],{"class":46,"line":47},[44,522,300],{"class":178},[44,524,194],{"class":62},[44,526,527,530],{"class":46,"line":188},[44,528,529],{"class":178},"  retention",[44,531,194],{"class":62},[44,533,534,537,539],{"class":46,"line":197},[44,535,536],{"class":178},"    active_days",[44,538,182],{"class":62},[44,540,541],{"class":54},"30\n",[44,543,544,547,549],{"class":46,"line":208},[44,545,546],{"class":178},"    terminated_days",[44,548,182],{"class":62},[44,550,551],{"class":54},"7\n",[44,553,554,557],{"class":46,"line":219},[44,555,556],{"class":178},"  storage",[44,558,194],{"class":62},[44,560,561,564,566],{"class":46,"line":230},[44,562,563],{"class":178},"    type",[44,565,182],{"class":62},[44,567,568],{"class":58},"local\n",[44,570,571,574,576],{"class":46,"line":372},[44,572,573],{"class":178},"    path",[44,575,182],{"class":62},[44,577,578],{"class":58},"\u002Fvar\u002Flib\u002Fheroctl\u002Flogs\n",[44,580,581,584,586],{"class":46,"line":378},[44,582,583],{"class":178},"    max_size_gb",[44,585,182],{"class":62},[44,587,588],{"class":54},"100\n",[10,590,591],{},"Para retenção mais longa, exporte para storage externo (próxima seção).",[22,593,595],{"id":594},"export-para-fora","Export para fora",[10,597,598],{},"Quando você precisa de retenção em anos, ou de correlação com logs de sistemas que não rodam no cluster, há saídas prontas:",[35,600,602],{"className":169,"code":601,"language":171,"meta":40,"style":40},"logs:\n  export:\n    - type: syslog\n      destination: logs.empresa.com.br:514\n      protocol: tcp\n      tls: true\n\n    - type: loki\n      url: https:\u002F\u002Floki.empresa.com.br\n      tenant: heroctl-prod\n\n    - type: cloudwatch\n      region: us-east-1\n      log_group: \u002Fheroctl\u002Fprod\n      credentials: ${secret.aws_logs}\n\n    - type: elasticsearch\n      url: https:\u002F\u002Felastic.empresa.com.br\n      index: heroctl-%Y.%m.%d\n      credentials: ${secret.es_creds}\n",[30,603,604,610,617,630,640,650,659,663,674,684,694,699,711,722,733,744,749,761,771,782],{"__ignoreMap":40},[44,605,606,608],{"class":46,"line":47},[44,607,300],{"class":178},[44,609,194],{"class":62},[44,611,612,615],{"class":46,"line":188},[44,613,614],{"class":178},"  export",[44,616,194],{"class":62},[44,618,619,622,625,627],{"class":46,"line":197},[44,620,621],{"class":62},"    - ",[44,623,624],{"class":178},"type",[44,626,182],{"class":62},[44,628,629],{"class":58},"syslog\n",[44,631,632,635,637],{"class":46,"line":208},[44,633,634],{"class":178},"      destination",[44,636,182],{"class":62},[44,638,639],{"class":58},"logs.empresa.com.br:514\n",[44,641,642,645,647],{"class":46,"line":219},[44,643,644],{"class":178},"      protocol",[44,646,182],{"class":62},[44,648,649],{"class":58},"tcp\n",[44,651,652,655,657],{"class":46,"line":230},[44,653,654],{"class":178},"      tls",[44,656,182],{"class":62},[44,658,205],{"class":54},[44,660,661],{"class":46,"line":372},[44,662,346],{"emptyLinePlaceholder":345},[44,664,665,667,669,671],{"class":46,"line":378},[44,666,621],{"class":62},[44,668,624],{"class":178},[44,670,182],{"class":62},[44,672,673],{"class":58},"loki\n",[44,675,676,679,681],{"class":46,"line":482},[44,677,678],{"class":178},"      url",[44,680,182],{"class":62},[44,682,683],{"class":58},"https:\u002F\u002Floki.empresa.com.br\n",[44,685,686,689,691],{"class":46,"line":488},[44,687,688],{"class":178},"      tenant",[44,690,182],{"class":62},[44,692,693],{"class":58},"heroctl-prod\n",[44,695,697],{"class":46,"line":696},11,[44,698,346],{"emptyLinePlaceholder":345},[44,700,702,704,706,708],{"class":46,"line":701},12,[44,703,621],{"class":62},[44,705,624],{"class":178},[44,707,182],{"class":62},[44,709,710],{"class":58},"cloudwatch\n",[44,712,714,717,719],{"class":46,"line":713},13,[44,715,716],{"class":178},"      region",[44,718,182],{"class":62},[44,720,721],{"class":58},"us-east-1\n",[44,723,725,728,730],{"class":46,"line":724},14,[44,726,727],{"class":178},"      log_group",[44,729,182],{"class":62},[44,731,732],{"class":58},"\u002Fheroctl\u002Fprod\n",[44,734,736,739,741],{"class":46,"line":735},15,[44,737,738],{"class":178},"      credentials",[44,740,182],{"class":62},[44,742,743],{"class":58},"${secret.aws_logs}\n",[44,745,747],{"class":46,"line":746},16,[44,748,346],{"emptyLinePlaceholder":345},[44,750,752,754,756,758],{"class":46,"line":751},17,[44,753,621],{"class":62},[44,755,624],{"class":178},[44,757,182],{"class":62},[44,759,760],{"class":58},"elasticsearch\n",[44,762,764,766,768],{"class":46,"line":763},18,[44,765,678],{"class":178},[44,767,182],{"class":62},[44,769,770],{"class":58},"https:\u002F\u002Felastic.empresa.com.br\n",[44,772,774,777,779],{"class":46,"line":773},19,[44,775,776],{"class":178},"      index",[44,778,182],{"class":62},[44,780,781],{"class":58},"heroctl-%Y.%m.%d\n",[44,783,785,787,789],{"class":46,"line":784},20,[44,786,738],{"class":178},[44,788,182],{"class":62},[44,790,791],{"class":58},"${secret.es_creds}\n",[10,793,794],{},"Vários destinos podem rodar ao mesmo tempo. O cluster mantém a cópia local pelo período de retenção e replica para os destinos configurados.",[17,796,798],{"id":797},"alertas","Alertas",[10,800,801],{},"Um alerta é uma expressão sobre métricas que dispara um webhook quando verdadeira por tempo configurado:",[35,803,805],{"className":169,"code":804,"language":171,"meta":40,"style":40},"alerts:\n  - name: api-erro-alto\n    expr: |\n      rate(http_requests_total{job=\"api-pagamentos\",status=~\"5..\"}[5m])\n        \u002F rate(http_requests_total{job=\"api-pagamentos\"}[5m]) > 0.05\n    for: 5m\n    severity: critical\n    annotations:\n      summary: \"Taxa de erro acima de 5% em api-pagamentos\"\n      runbook: https:\u002F\u002Fwiki.empresa.com.br\u002Frunbook\u002Fapi-pagamentos\n\n    notify:\n      - type: slack\n        webhook: ${secret.slack_oncall}\n      - type: pagerduty\n        routing_key: ${secret.pagerduty_critical}\n\n  - name: certificado-expirando\n    expr: heroctl_ingress_cert_expiry_days \u003C 14\n    for: 1h\n    severity: warning\n    notify:\n      - type: discord\n        webhook: ${secret.discord_ops}\n",[30,806,807,814,827,837,842,847,857,867,874,884,894,898,905,917,927,938,948,952,963,972,981,991,998,1010],{"__ignoreMap":40},[44,808,809,812],{"class":46,"line":47},[44,810,811],{"class":178},"alerts",[44,813,194],{"class":62},[44,815,816,819,822,824],{"class":46,"line":188},[44,817,818],{"class":62},"  - ",[44,820,821],{"class":178},"name",[44,823,182],{"class":62},[44,825,826],{"class":58},"api-erro-alto\n",[44,828,829,832,834],{"class":46,"line":197},[44,830,831],{"class":178},"    expr",[44,833,182],{"class":62},[44,835,836],{"class":422},"|\n",[44,838,839],{"class":46,"line":208},[44,840,841],{"class":58},"      rate(http_requests_total{job=\"api-pagamentos\",status=~\"5..\"}[5m])\n",[44,843,844],{"class":46,"line":219},[44,845,846],{"class":58},"        \u002F rate(http_requests_total{job=\"api-pagamentos\"}[5m]) > 0.05\n",[44,848,849,852,854],{"class":46,"line":230},[44,850,851],{"class":178},"    for",[44,853,182],{"class":62},[44,855,856],{"class":58},"5m\n",[44,858,859,862,864],{"class":46,"line":372},[44,860,861],{"class":178},"    severity",[44,863,182],{"class":62},[44,865,866],{"class":58},"critical\n",[44,868,869,872],{"class":46,"line":378},[44,870,871],{"class":178},"    annotations",[44,873,194],{"class":62},[44,875,876,879,881],{"class":46,"line":482},[44,877,878],{"class":178},"      summary",[44,880,182],{"class":62},[44,882,883],{"class":58},"\"Taxa de erro acima de 5% em api-pagamentos\"\n",[44,885,886,889,891],{"class":46,"line":488},[44,887,888],{"class":178},"      runbook",[44,890,182],{"class":62},[44,892,893],{"class":58},"https:\u002F\u002Fwiki.empresa.com.br\u002Frunbook\u002Fapi-pagamentos\n",[44,895,896],{"class":46,"line":696},[44,897,346],{"emptyLinePlaceholder":345},[44,899,900,903],{"class":46,"line":701},[44,901,902],{"class":178},"    notify",[44,904,194],{"class":62},[44,906,907,910,912,914],{"class":46,"line":713},[44,908,909],{"class":62},"      - ",[44,911,624],{"class":178},[44,913,182],{"class":62},[44,915,916],{"class":58},"slack\n",[44,918,919,922,924],{"class":46,"line":724},[44,920,921],{"class":178},"        webhook",[44,923,182],{"class":62},[44,925,926],{"class":58},"${secret.slack_oncall}\n",[44,928,929,931,933,935],{"class":46,"line":735},[44,930,909],{"class":62},[44,932,624],{"class":178},[44,934,182],{"class":62},[44,936,937],{"class":58},"pagerduty\n",[44,939,940,943,945],{"class":46,"line":746},[44,941,942],{"class":178},"        routing_key",[44,944,182],{"class":62},[44,946,947],{"class":58},"${secret.pagerduty_critical}\n",[44,949,950],{"class":46,"line":751},[44,951,346],{"emptyLinePlaceholder":345},[44,953,954,956,958,960],{"class":46,"line":763},[44,955,818],{"class":62},[44,957,821],{"class":178},[44,959,182],{"class":62},[44,961,962],{"class":58},"certificado-expirando\n",[44,964,965,967,969],{"class":46,"line":773},[44,966,831],{"class":178},[44,968,182],{"class":62},[44,970,971],{"class":58},"heroctl_ingress_cert_expiry_days \u003C 14\n",[44,973,974,976,978],{"class":46,"line":784},[44,975,851],{"class":178},[44,977,182],{"class":62},[44,979,980],{"class":58},"1h\n",[44,982,984,986,988],{"class":46,"line":983},21,[44,985,861],{"class":178},[44,987,182],{"class":62},[44,989,990],{"class":58},"warning\n",[44,992,994,996],{"class":46,"line":993},22,[44,995,902],{"class":178},[44,997,194],{"class":62},[44,999,1001,1003,1005,1007],{"class":46,"line":1000},23,[44,1002,909],{"class":62},[44,1004,624],{"class":178},[44,1006,182],{"class":62},[44,1008,1009],{"class":58},"discord\n",[44,1011,1013,1015,1017],{"class":46,"line":1012},24,[44,1014,921],{"class":178},[44,1016,182],{"class":62},[44,1018,1019],{"class":58},"${secret.discord_ops}\n",[10,1021,1022],{},"Canais suportados de fábrica: Slack, Discord, PagerDuty, Opsgenie, webhook genérico. Para quem quer integração custom (Telegram, e-mail, SMS), o webhook genérico cobre tudo.",[1024,1025,1026],"blockquote",{},[10,1027,1028,1032],{},[1029,1030,1031],"strong",{},"Atenção:"," Comece com poucos alertas críticos. Vinte alertas barulhentos viram zero alertas — o time aprende a ignorar. Cinco alertas que sempre indicam problema real são úteis.",[17,1034,1036],{"id":1035},"tracing-distribuido","Tracing distribuído",[10,1038,1039],{},"Tracing está disponível como opt-in no spec do job:",[35,1041,1043],{"className":169,"code":1042,"language":171,"meta":40,"style":40},"job: api-pagamentos\ntracing:\n  enabled: true\n  protocol: otlp\n  sample_rate: 0.1   # 10% das requisições\n",[30,1044,1045,1053,1060,1068,1078],{"__ignoreMap":40},[44,1046,1047,1049,1051],{"class":46,"line":47},[44,1048,179],{"class":178},[44,1050,182],{"class":62},[44,1052,185],{"class":58},[44,1054,1055,1058],{"class":46,"line":188},[44,1056,1057],{"class":178},"tracing",[44,1059,194],{"class":62},[44,1061,1062,1064,1066],{"class":46,"line":197},[44,1063,200],{"class":178},[44,1065,182],{"class":62},[44,1067,205],{"class":54},[44,1069,1070,1073,1075],{"class":46,"line":208},[44,1071,1072],{"class":178},"  protocol",[44,1074,182],{"class":62},[44,1076,1077],{"class":58},"otlp\n",[44,1079,1080,1083,1085,1088],{"class":46,"line":219},[44,1081,1082],{"class":178},"  sample_rate",[44,1084,182],{"class":62},[44,1086,1087],{"class":54},"0.1",[44,1089,1090],{"class":322},"   # 10% das requisições\n",[10,1092,1093],{},"A aplicação instrumentada com OpenTelemetry envia para o coletor embutido. O painel mostra traces correlacionados com logs e métricas da mesma alocação.",[10,1095,1096],{},"Para visualização avançada (timeline de spans, comparação entre traces, análise de cauda), exporte para Jaeger, Tempo ou um SaaS como Honeycomb:",[35,1098,1100],{"className":169,"code":1099,"language":171,"meta":40,"style":40},"tracing:\n  export:\n    - type: otlp\n      endpoint: tempo.empresa.com.br:4317\n      tls: true\n",[30,1101,1102,1108,1114,1124,1134],{"__ignoreMap":40},[44,1103,1104,1106],{"class":46,"line":47},[44,1105,1057],{"class":178},[44,1107,194],{"class":62},[44,1109,1110,1112],{"class":46,"line":188},[44,1111,614],{"class":178},[44,1113,194],{"class":62},[44,1115,1116,1118,1120,1122],{"class":46,"line":197},[44,1117,621],{"class":62},[44,1119,624],{"class":178},[44,1121,182],{"class":62},[44,1123,1077],{"class":58},[44,1125,1126,1129,1131],{"class":46,"line":208},[44,1127,1128],{"class":178},"      endpoint",[44,1130,182],{"class":62},[44,1132,1133],{"class":58},"tempo.empresa.com.br:4317\n",[44,1135,1136,1138,1140],{"class":46,"line":219},[44,1137,654],{"class":178},[44,1139,182],{"class":62},[44,1141,205],{"class":54},[17,1143,1145],{"id":1144},"comparacao-de-custo","Comparação de custo",[10,1147,1148],{},"Para um cluster típico — 4 nós, 30 jobs, 100 milhões de requests\u002Fmês — um stack de observabilidade SaaS comercial fica entre R$ 1.000 e R$ 2.000 por mês. Um stack auto-hospedado equivalente (Prometheus + Loki + Grafana + Alertmanager + Tempo) tem custo direto baixo, mas exige meio dia de operação por semana.",[86,1150,1151,1167],{},[89,1152,1153],{},[92,1154,1155,1158,1161,1164],{},[95,1156,1157],{},"Item",[95,1159,1160],{},"Stack interno",[95,1162,1163],{},"SaaS comercial",[95,1165,1166],{},"Stack auto-hospedado",[102,1168,1169,1183,1197,1211,1225],{},[92,1170,1171,1174,1177,1180],{},[107,1172,1173],{},"Custo direto\u002Fmês",[107,1175,1176],{},"R$ 0",[107,1178,1179],{},"R$ 1.000–2.000",[107,1181,1182],{},"R$ 100–300 (infra)",[92,1184,1185,1188,1191,1194],{},[107,1186,1187],{},"Tempo de setup",[107,1189,1190],{},"0 (já roda)",[107,1192,1193],{},"1 dia",[107,1195,1196],{},"1 a 2 semanas",[92,1198,1199,1202,1205,1208],{},[107,1200,1201],{},"Manutenção",[107,1203,1204],{},"Junto do cluster",[107,1206,1207],{},"Zero",[107,1209,1210],{},"Algumas horas\u002Fsemana",[92,1212,1213,1216,1219,1222],{},[107,1214,1215],{},"Limites",[107,1217,1218],{},"Para times até ~50 jobs",[107,1220,1221],{},"Praticamente ilimitado",[107,1223,1224],{},"O que sua infra aguentar",[92,1226,1227,1230,1232,1235],{},[107,1228,1229],{},"Customização de dashboard",[107,1231,272],{},[107,1233,1234],{},"Alta",[107,1236,1237],{},"Total",[10,1239,1240],{},"Recomendação prática: comece pelo stack interno. Quando a operação cresce além do que ele atende — geralmente além de 50 jobs ou retenção de log acima de 6 meses — exporte para Loki e Grafana auto-hospedados. SaaS comercial só vale quando o tempo do time é mais caro que a fatura.",[17,1242,1244],{"id":1243},"proximos-passos","Próximos passos",[277,1246,1247,1256],{},[280,1248,1249,1250,1255],{},"Configurar ",[1251,1252,1254],"a",{"href":1253},"#alertas","alertas conectados ao Slack ou PagerDuty"," antes do primeiro deploy crítico.",[280,1257,1258,1259,1263],{},"Revisar ",[1251,1260,1262],{"href":1261},"\u002Fdocs\u002Fseguranca\u002Frbac","RBAC"," para limitar quem vê quais logs (logs podem conter dado sensível).",[1265,1266,1267],"style",{},"html pre.shiki code .sQhOw, html code.shiki .sQhOw{--shiki-default:#FFA657}html pre.shiki code .sFSAA, html code.shiki .sFSAA{--shiki-default:#79C0FF}html pre.shiki code .s9uIt, html code.shiki .s9uIt{--shiki-default:#A5D6FF}html pre.shiki code .sZEs4, html code.shiki .sZEs4{--shiki-default:#E6EDF3}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sPWt5, html code.shiki .sPWt5{--shiki-default:#7EE787}html pre.shiki code .sH3jZ, html code.shiki .sH3jZ{--shiki-default:#8B949E}html pre.shiki code .suJrU, html code.shiki .suJrU{--shiki-default:#FF7B72}",{"title":40,"searchDepth":188,"depth":188,"links":1269},[1270,1275,1276,1283,1284,1285,1286],{"id":19,"depth":188,"text":20,"children":1271},[1272,1273,1274],{"id":24,"depth":197,"text":25},{"id":83,"depth":197,"text":84},{"id":158,"depth":197,"text":159},{"id":271,"depth":188,"text":272},{"id":300,"depth":188,"text":301,"children":1277},[1278,1279,1280,1281,1282],{"id":304,"depth":197,"text":305},{"id":311,"depth":197,"text":312},{"id":398,"depth":197,"text":399},{"id":509,"depth":197,"text":510},{"id":594,"depth":197,"text":595},{"id":797,"depth":188,"text":798},{"id":1035,"depth":188,"text":1036},{"id":1144,"depth":188,"text":1145},{"id":1243,"depth":188,"text":1244},"observabilidade","Coleta de métricas, logs e traces sem montar uma pilha de observabilidade externa. Quando vale, e quando integrar com ferramenta de fora.",false,"md","i-lucide-activity","2026-04-26",{},"\u002Fdocs\u002Fobservabilidade\u002Fmetricas-logs",[],"10 min",{"title":5,"description":1288},"docs\u002Fobservabilidade\u002Fmetricas-logs",[19,300,1300,1301,797],"prometheus","opentelemetry","wQ4AKaXCnJ4qsvfyCcbYm-wCiuugGCdbkA-aKHtQSJE",[1304,1310,1316,1321,1326,1327,1333,1338,1343,1348,1354,1358,1363,1368],{"path":1305,"title":1306,"description":1307,"category":1308,"order":47,"icon":1309},"\u002Fdocs\u002Fapi\u002Freferencia-api","Referência da API REST","Endpoints, autenticação JWT, exemplos com curl e padrões de erro da API do HeroCtl.","api","i-lucide-code",{"path":1311,"title":1312,"description":1313,"category":1314,"order":47,"icon":1315},"\u002Fdocs\u002Fdeploy\u002Fprimeiro-deploy","Deploy do primeiro app","Suba uma aplicação Node.js com banco Postgres em 50 linhas de YAML. Inclui health check, rolling deploy e rollback.","deploy","i-lucide-rocket",{"path":1317,"title":1318,"description":1319,"category":1314,"order":188,"icon":1320},"\u002Fdocs\u002Fdeploy\u002Frolling-canary-bluegreen","Rolling, canary, blue-green e rainbow","Quatro estratégias de deploy. Quando usar cada uma, com exemplos completos e trade-offs honestos.","i-lucide-git-branch",{"path":1322,"title":1323,"description":1324,"category":1287,"order":188,"icon":1325},"\u002Fdocs\u002Fobservabilidade\u002Fbackup-restore","Backup e restore do estado do cluster","Como salvar, agendar e restaurar snapshots do plano de controle do HeroCtl. Estratégia de disaster recovery.","i-lucide-archive",{"path":1294,"title":5,"description":1288,"category":1287,"order":47,"icon":1291},{"path":1328,"title":1329,"description":1330,"category":1331,"order":197,"icon":1332},"\u002Fdocs\u002Foperacoes\u002Fcomandos-cli","Referência completa do CLI","Todos os comandos heroctl com sinopse, flags e exemplo. Use como cola de mesa.","operacoes","i-lucide-terminal",{"path":1334,"title":1335,"description":1336,"category":1331,"order":47,"icon":1337},"\u002Fdocs\u002Foperacoes\u002Finstalacao","Instalação","Instale o HeroCtl em qualquer servidor Linux com Docker em um único comando. Cobre pré-requisitos, bootstrap e verificação.","i-lucide-download",{"path":1339,"title":1340,"description":1341,"category":1331,"order":208,"icon":1342},"\u002Fdocs\u002Foperacoes\u002Fmulti-region","Multi-region (em planejamento Q4 2026)","O que esperar de multi-region no HeroCtl, como rodar em várias regiões hoje e o roadmap até 2027.","i-lucide-globe",{"path":1344,"title":1345,"description":1346,"category":1331,"order":188,"icon":1347},"\u002Fdocs\u002Foperacoes\u002Fprimeiro-cluster","Subir cluster de 3 nós","Forme um cluster com 3 servidores em menos de 10 minutos. Tolera falha de 1 nó sem indisponibilidade.","i-lucide-network",{"path":1349,"title":1350,"description":1351,"category":1352,"order":188,"icon":1353},"\u002Fdocs\u002Frede\u002Ffirewall","Configuração de firewall","Quais portas o HeroCtl usa, quais precisam ficar abertas, e quais nunca deveriam ser expostas à internet.","rede","i-lucide-shield",{"path":1355,"title":1356,"description":1357,"category":1352,"order":47,"icon":1342},"\u002Fdocs\u002Frede\u002Fingress-tls","Ingress e TLS automático","Como expor aplicações pela porta 443 com certificados emitidos e renovados automaticamente, sem operar um roteador externo.",{"path":1261,"title":1359,"description":1360,"category":1361,"order":188,"icon":1362},"RBAC e controle de acesso (Business+)","Modelo de papéis, políticas e tokens para limitar quem pode submeter, ler e operar o cluster.","seguranca","i-lucide-users",{"path":1364,"title":1365,"description":1366,"category":1361,"order":47,"icon":1367},"\u002Fdocs\u002Fseguranca\u002Fsecrets","Gerenciamento de segredos","Como guardar senhas, tokens e chaves fora do spec do job, com criptografia em repouso e rotação versionada.","i-lucide-key",{"path":1369,"title":1370,"description":1371,"category":1372,"order":47,"icon":1373},"\u002Fdocs\u002Ftroubleshooting\u002Fproblemas-comuns","Troubleshooting de problemas comuns","Os 12 problemas mais frequentes em clusters HeroCtl, com sintoma, diagnóstico e correção passo a passo.","troubleshooting","i-lucide-alert-triangle",1777362179450]