[{"data":1,"prerenderedAt":1629},["ShallowReactive",2],{"doc-\u002Fdocs\u002Ftroubleshooting\u002Fproblemas-comuns":3,"docs-all":1562},{"id":4,"title":5,"body":6,"category":1545,"description":1546,"draft":1547,"extension":1548,"icon":1549,"lastReviewed":1550,"meta":1551,"navigation":227,"order":44,"path":1552,"prerequisites":1553,"readingTime":1554,"seo":1555,"stem":1556,"tags":1557,"__hash__":1561},"docs_pt\u002Fdocs\u002Ftroubleshooting\u002Fproblemas-comuns.md","Troubleshooting de problemas comuns",{"type":7,"value":8,"toc":1529},"minimark",[9,13,18,25,30,88,91,96,99,132,135,175,179,192,196,199,314,318,339,371,374,378,387,391,404,407,411,414,436,439,460,473,477,490,494,512,515,519,522,530,568,572,577,581,611,618,622,625,689,692,696,701,705,746,749,753,764,768,773,777,801,804,808,811,836,839,843,851,855,884,891,895,898,968,971,988,992,1005,1009,1012,1047,1058,1062,1065,1102,1105,1109,1114,1118,1132,1135,1139,1142,1157,1160,1186,1190,1199,1203,1250,1253,1257,1260,1317,1320,1350,1354,1365,1369,1402,1405,1409,1412,1435,1438,1455,1458,1462,1465,1489,1499,1503,1525],[10,11,12],"p",{},"Este guia cobre os doze problemas que aparecem com mais frequência em clusters HeroCtl. Cada item traz sintoma, diagnóstico e correção. Use como referência rápida em incidente.",[14,15,17],"h2",{"id":16},"_1-cluster-nao-inicia-cannot-bind-to-port-8080","1. Cluster não inicia: \"cannot bind to port 8080\"",[10,19,20,24],{},[21,22,23],"strong",{},"Sintoma:"," o serviço sobe e morre logo em seguida. O log diz que a porta 8080 está em uso.",[10,26,27],{},[21,28,29],{},"Diagnóstico:",[31,32,37],"pre",{"className":33,"code":34,"language":35,"meta":36,"style":36},"language-bash shiki shiki-themes github-dark-default","sudo lsof -i :8080\n# ou\nsudo ss -tlnp | grep 8080\n","bash","",[38,39,40,60,67],"code",{"__ignoreMap":36},[41,42,45,49,53,57],"span",{"class":43,"line":44},"line",1,[41,46,48],{"class":47},"sQhOw","sudo",[41,50,52],{"class":51},"s9uIt"," lsof",[41,54,56],{"class":55},"sFSAA"," -i",[41,58,59],{"class":51}," :8080\n",[41,61,63],{"class":43,"line":62},2,[41,64,66],{"class":65},"sH3jZ","# ou\n",[41,68,70,72,75,78,82,85],{"class":43,"line":69},3,[41,71,48],{"class":47},[41,73,74],{"class":51}," ss",[41,76,77],{"class":55}," -tlnp",[41,79,81],{"class":80},"suJrU"," |",[41,83,84],{"class":47}," grep",[41,86,87],{"class":55}," 8080\n",[10,89,90],{},"A saída mostra qual processo está segurando a porta.",[10,92,93],{},[21,94,95],{},"Correção:",[10,97,98],{},"Se for processo legítimo (outro app), mude a porta do HeroCtl:",[31,100,104],{"className":101,"code":102,"language":103,"meta":36,"style":36},"language-yaml shiki shiki-themes github-dark-default","# \u002Fetc\u002Fheroctl\u002Fserver.yaml\napi:\n  port: 8090\n","yaml",[38,105,106,111,121],{"__ignoreMap":36},[41,107,108],{"class":43,"line":44},[41,109,110],{"class":65},"# \u002Fetc\u002Fheroctl\u002Fserver.yaml\n",[41,112,113,117],{"class":43,"line":62},[41,114,116],{"class":115},"sPWt5","api",[41,118,120],{"class":119},"sZEs4",":\n",[41,122,123,126,129],{"class":43,"line":69},[41,124,125],{"class":115},"  port",[41,127,128],{"class":119},": ",[41,130,131],{"class":55},"8090\n",[10,133,134],{},"Se for processo zumbi (HeroCtl antigo que não morreu direito):",[31,136,138],{"className":33,"code":137,"language":35,"meta":36,"style":36},"sudo kill -9 \u003CPID>\nsudo systemctl start heroctl-server\n",[38,139,140,162],{"__ignoreMap":36},[41,141,142,144,147,150,153,156,159],{"class":43,"line":44},[41,143,48],{"class":47},[41,145,146],{"class":51}," kill",[41,148,149],{"class":55}," -9",[41,151,152],{"class":80}," \u003C",[41,154,155],{"class":51},"PI",[41,157,158],{"class":119},"D",[41,160,161],{"class":80},">\n",[41,163,164,166,169,172],{"class":43,"line":62},[41,165,48],{"class":47},[41,167,168],{"class":51}," systemctl",[41,170,171],{"class":51}," start",[41,173,174],{"class":51}," heroctl-server\n",[14,176,178],{"id":177},"_2-no-nao-consegue-entrar-no-cluster","2. Nó não consegue entrar no cluster",[10,180,181,183,184,187,188,191],{},[21,182,23],{}," comando de join trava ou retorna ",[38,185,186],{},"connection refused"," \u002F ",[38,189,190],{},"invalid token",".",[10,193,194],{},[21,195,29],{},[10,197,198],{},"Três suspeitos comuns:",[31,200,202],{"className":33,"code":201,"language":35,"meta":36,"style":36},"# 1. Token expirou?\nheroctl cluster join-token list\n\n# 2. Firewall bloqueando?\nnc -zv \u003Cip-do-coordenador> 4646\nnc -zv \u003Cip-do-coordenador> 4647\nnc -zv \u003Cip-do-coordenador> 4648\n\n# 3. Relógios fora de sincronia?\ntimedatectl status\n",[38,203,204,209,223,229,235,258,276,294,299,305],{"__ignoreMap":36},[41,205,206],{"class":43,"line":44},[41,207,208],{"class":65},"# 1. Token expirou?\n",[41,210,211,214,217,220],{"class":43,"line":62},[41,212,213],{"class":47},"heroctl",[41,215,216],{"class":51}," cluster",[41,218,219],{"class":51}," join-token",[41,221,222],{"class":51}," list\n",[41,224,225],{"class":43,"line":69},[41,226,228],{"emptyLinePlaceholder":227},true,"\n",[41,230,232],{"class":43,"line":231},4,[41,233,234],{"class":65},"# 2. Firewall bloqueando?\n",[41,236,238,241,244,246,249,252,255],{"class":43,"line":237},5,[41,239,240],{"class":47},"nc",[41,242,243],{"class":55}," -zv",[41,245,152],{"class":80},[41,247,248],{"class":51},"ip-do-coordenado",[41,250,251],{"class":119},"r",[41,253,254],{"class":80},">",[41,256,257],{"class":55}," 4646\n",[41,259,261,263,265,267,269,271,273],{"class":43,"line":260},6,[41,262,240],{"class":47},[41,264,243],{"class":55},[41,266,152],{"class":80},[41,268,248],{"class":51},[41,270,251],{"class":119},[41,272,254],{"class":80},[41,274,275],{"class":55}," 4647\n",[41,277,279,281,283,285,287,289,291],{"class":43,"line":278},7,[41,280,240],{"class":47},[41,282,243],{"class":55},[41,284,152],{"class":80},[41,286,248],{"class":51},[41,288,251],{"class":119},[41,290,254],{"class":80},[41,292,293],{"class":55}," 4648\n",[41,295,297],{"class":43,"line":296},8,[41,298,228],{"emptyLinePlaceholder":227},[41,300,302],{"class":43,"line":301},9,[41,303,304],{"class":65},"# 3. Relógios fora de sincronia?\n",[41,306,308,311],{"class":43,"line":307},10,[41,309,310],{"class":47},"timedatectl",[41,312,313],{"class":51}," status\n",[10,315,316],{},[21,317,95],{},[319,320,321,328,336],"ul",{},[322,323,324,325,191],"li",{},"Token expirado: gere outro com ",[38,326,327],{},"heroctl cluster join-token create --ttl 1h",[322,329,330,331,191],{},"Firewall: libere as portas 4646, 4647 e 4648 entre os nós. Veja ",[332,333,335],"a",{"href":334},"\u002Fdocs\u002Frede\u002Ffirewall","firewall",[322,337,338],{},"Relógio: instale e ative NTP.",[31,340,342],{"className":33,"code":341,"language":35,"meta":36,"style":36},"sudo apt install chrony\nsudo systemctl enable --now chrony\n",[38,343,344,357],{"__ignoreMap":36},[41,345,346,348,351,354],{"class":43,"line":44},[41,347,48],{"class":47},[41,349,350],{"class":51}," apt",[41,352,353],{"class":51}," install",[41,355,356],{"class":51}," chrony\n",[41,358,359,361,363,366,369],{"class":43,"line":62},[41,360,48],{"class":47},[41,362,168],{"class":51},[41,364,365],{"class":51}," enable",[41,367,368],{"class":55}," --now",[41,370,356],{"class":51},[10,372,373],{},"Diferença maior que 30 segundos entre nós quebra a coordenação.",[14,375,377],{"id":376},"_3-cluster-perdeu-coordenacao","3. Cluster perdeu coordenação",[10,379,380,382,383,386],{},[21,381,23],{}," API responde com ",[38,384,385],{},"503"," e mensagens sobre falta de coordenador. Mudanças não são aceitas.",[10,388,389],{},[21,390,29],{},[31,392,394],{"className":33,"code":393,"language":35,"meta":36,"style":36},"heroctl cluster status\n",[38,395,396],{"__ignoreMap":36},[41,397,398,400,402],{"class":43,"line":44},[41,399,213],{"class":47},[41,401,216],{"class":51},[41,403,313],{"class":51},[10,405,406],{},"Você verá quantos nós estão saudáveis. Se menos da metade respondem, o cluster trava em modo somente-leitura por segurança.",[10,408,409],{},[21,410,95],{},[10,412,413],{},"A solução normal é trazer os nós caídos de volta:",[31,415,417],{"className":33,"code":416,"language":35,"meta":36,"style":36},"ssh nó-caído sudo systemctl start heroctl-server\n",[38,418,419],{"__ignoreMap":36},[41,420,421,424,427,430,432,434],{"class":43,"line":44},[41,422,423],{"class":47},"ssh",[41,425,426],{"class":51}," nó-caído",[41,428,429],{"class":51}," sudo",[41,431,168],{"class":51},[41,433,171],{"class":51},[41,435,174],{"class":51},[10,437,438],{},"Se eles não voltam (disco morto, máquina perdida), use bootstrap forçado a partir do snapshot mais recente:",[31,440,442],{"className":33,"code":441,"language":35,"meta":36,"style":36},"heroctl snapshot restore \u002Fbackups\u002Fultimo.tar.gz --force-bootstrap\n",[38,443,444],{"__ignoreMap":36},[41,445,446,448,451,454,457],{"class":43,"line":44},[41,447,213],{"class":47},[41,449,450],{"class":51}," snapshot",[41,452,453],{"class":51}," restore",[41,455,456],{"class":51}," \u002Fbackups\u002Fultimo.tar.gz",[41,458,459],{"class":55}," --force-bootstrap\n",[461,462,463],"blockquote",{},[10,464,465,468,469,191],{},[21,466,467],{},"Atenção:"," bootstrap forçado descarta tudo que aconteceu depois do snapshot. Veja ",[332,470,472],{"href":471},"\u002Fdocs\u002Fobservabilidade\u002Fbackup-restore","backup e restore",[14,474,476],{"id":475},"_4-job-fica-em-pending","4. Job fica em \"pending\"",[10,478,479,481,482,485,486,489],{},[21,480,23],{}," ",[38,483,484],{},"heroctl jobs status meu-job"," mostra ",[38,487,488],{},"pending"," por minutos. Nada inicia.",[10,491,492],{},[21,493,29],{},[31,495,497],{"className":33,"code":496,"language":35,"meta":36,"style":36},"heroctl jobs explain meu-job\n",[38,498,499],{"__ignoreMap":36},[41,500,501,503,506,509],{"class":43,"line":44},[41,502,213],{"class":47},[41,504,505],{"class":51}," jobs",[41,507,508],{"class":51}," explain",[41,510,511],{"class":51}," meu-job\n",[10,513,514],{},"A saída detalha por que o agendador não consegue colocar o job. Causa típica: nenhum nó tem CPU\u002FRAM disponível para os recursos pedidos.",[10,516,517],{},[21,518,95],{},[10,520,521],{},"Duas opções:",[319,523,524,527],{},[322,525,526],{},"Adicione mais nós ao cluster.",[322,528,529],{},"Reduza os recursos exigidos pelo job:",[31,531,533],{"className":101,"code":532,"language":103,"meta":36,"style":36},"resources:\n  cpu_mhz: 500      # antes era 2000\n  memory_mb: 256    # antes era 1024\n",[38,534,535,542,555],{"__ignoreMap":36},[41,536,537,540],{"class":43,"line":44},[41,538,539],{"class":115},"resources",[41,541,120],{"class":119},[41,543,544,547,549,552],{"class":43,"line":62},[41,545,546],{"class":115},"  cpu_mhz",[41,548,128],{"class":119},[41,550,551],{"class":55},"500",[41,553,554],{"class":65},"      # antes era 2000\n",[41,556,557,560,562,565],{"class":43,"line":69},[41,558,559],{"class":115},"  memory_mb",[41,561,128],{"class":119},[41,563,564],{"class":55},"256",[41,566,567],{"class":65},"    # antes era 1024\n",[14,569,571],{"id":570},"_5-health-check-falhando","5. Health check falhando",[10,573,574,576],{},[21,575,23],{}," o job sobe mas é marcado como não saudável. Reinicia em loop.",[10,578,579],{},[21,580,29],{},[31,582,584],{"className":33,"code":583,"language":35,"meta":36,"style":36},"heroctl logs \u003Calloc-id> | tail -50\n",[38,585,586],{"__ignoreMap":36},[41,587,588,590,593,595,598,601,603,605,608],{"class":43,"line":44},[41,589,213],{"class":47},[41,591,592],{"class":51}," logs",[41,594,152],{"class":80},[41,596,597],{"class":51},"alloc-i",[41,599,600],{"class":119},"d",[41,602,254],{"class":80},[41,604,81],{"class":80},[41,606,607],{"class":47}," tail",[41,609,610],{"class":55}," -50\n",[10,612,613,614,617],{},"Frequentemente o app está demorando mais para subir que o ",[38,615,616],{},"healthy_deadline"," permite.",[10,619,620],{},[21,621,95],{},[10,623,624],{},"Aumente o prazo:",[31,626,628],{"className":101,"code":627,"language":103,"meta":36,"style":36},"health_check:\n  path: \u002Fhealth\n  port: 8080\n  interval: 10s\n  timeout: 3s\n  healthy_deadline: 120s    # era 30s\n",[38,629,630,637,647,656,666,676],{"__ignoreMap":36},[41,631,632,635],{"class":43,"line":44},[41,633,634],{"class":115},"health_check",[41,636,120],{"class":119},[41,638,639,642,644],{"class":43,"line":62},[41,640,641],{"class":115},"  path",[41,643,128],{"class":119},[41,645,646],{"class":51},"\u002Fhealth\n",[41,648,649,651,653],{"class":43,"line":69},[41,650,125],{"class":115},[41,652,128],{"class":119},[41,654,655],{"class":55},"8080\n",[41,657,658,661,663],{"class":43,"line":231},[41,659,660],{"class":115},"  interval",[41,662,128],{"class":119},[41,664,665],{"class":51},"10s\n",[41,667,668,671,673],{"class":43,"line":237},[41,669,670],{"class":115},"  timeout",[41,672,128],{"class":119},[41,674,675],{"class":51},"3s\n",[41,677,678,681,683,686],{"class":43,"line":260},[41,679,680],{"class":115},"  healthy_deadline",[41,682,128],{"class":119},[41,684,685],{"class":51},"120s",[41,687,688],{"class":65},"    # era 30s\n",[10,690,691],{},"Se o app está realmente lento para subir (carrega cache enorme, conecta em vários serviços), trabalhe o tempo de boot. Lazy loading geralmente resolve.",[14,693,695],{"id":694},"_6-certificado-tls-nao-e-emitido","6. Certificado TLS não é emitido",[10,697,698,700],{},[21,699,23],{}," site responde com certificado autoassinado ou erro de TLS. Logs do ingress mencionam falha em emissão automática.",[10,702,703],{},[21,704,29],{},[31,706,708],{"className":33,"code":707,"language":35,"meta":36,"style":36},"# DNS aponta pro IP correto?\ndig +short meudominio.com\n\n# Porta 80 acessível externamente?\ncurl -I http:\u002F\u002Fmeudominio.com\u002F.well-known\u002Facme-challenge\u002Ftest\n",[38,709,710,715,726,730,735],{"__ignoreMap":36},[41,711,712],{"class":43,"line":44},[41,713,714],{"class":65},"# DNS aponta pro IP correto?\n",[41,716,717,720,723],{"class":43,"line":62},[41,718,719],{"class":47},"dig",[41,721,722],{"class":51}," +short",[41,724,725],{"class":51}," meudominio.com\n",[41,727,728],{"class":43,"line":69},[41,729,228],{"emptyLinePlaceholder":227},[41,731,732],{"class":43,"line":231},[41,733,734],{"class":65},"# Porta 80 acessível externamente?\n",[41,736,737,740,743],{"class":43,"line":237},[41,738,739],{"class":47},"curl",[41,741,742],{"class":55}," -I",[41,744,745],{"class":51}," http:\u002F\u002Fmeudominio.com\u002F.well-known\u002Facme-challenge\u002Ftest\n",[10,747,748],{},"A emissão automática de certificado precisa de duas coisas: DNS público apontando para um nó do cluster e a porta 80 aberta para o mundo.",[10,750,751],{},[21,752,95],{},[319,754,755,758,761],{},[322,756,757],{},"DNS errado: corrija o registro A no seu provedor.",[322,759,760],{},"Porta 80 fechada: libere no firewall do servidor e no firewall do provedor (security group, etc).",[322,762,763],{},"Domínio com proxy de CDN ativo: desative o proxy temporariamente para a emissão; reative depois.",[14,765,767],{"id":766},"_7-app-lento-sob-carga","7. App lento sob carga",[10,769,770,772],{},[21,771,23],{}," latência sobe quando o tráfego aumenta. Usuários reclamam.",[10,774,775],{},[21,776,29],{},[31,778,780],{"className":33,"code":779,"language":35,"meta":36,"style":36},"heroctl metrics --job meu-app --since 30m\n",[38,781,782],{"__ignoreMap":36},[41,783,784,786,789,792,795,798],{"class":43,"line":44},[41,785,213],{"class":47},[41,787,788],{"class":51}," metrics",[41,790,791],{"class":55}," --job",[41,793,794],{"class":51}," meu-app",[41,796,797],{"class":55}," --since",[41,799,800],{"class":51}," 30m\n",[10,802,803],{},"Olhe CPU, memória e número de instâncias. Verifique também se há um deploy em curso — deploys gradativos retiram capacidade temporariamente.",[10,805,806],{},[21,807,95],{},[10,809,810],{},"Se está faltando capacidade, escale:",[31,812,814],{"className":33,"code":813,"language":35,"meta":36,"style":36},"heroctl jobs scale meu-app --count 6   # de 3 para 6\n",[38,815,816],{"__ignoreMap":36},[41,817,818,820,822,825,827,830,833],{"class":43,"line":44},[41,819,213],{"class":47},[41,821,505],{"class":51},[41,823,824],{"class":51}," scale",[41,826,794],{"class":51},[41,828,829],{"class":55}," --count",[41,831,832],{"class":55}," 6",[41,834,835],{"class":65},"   # de 3 para 6\n",[10,837,838],{},"Se está deploy em curso, espere ele terminar antes de avaliar. Se o app está com vazamento de memória ou loop apertado, perfile o código — não tem como o orquestrador resolver problema interno do app.",[14,840,842],{"id":841},"_8-logs-nao-aparecem","8. Logs não aparecem",[10,844,845,481,847,850],{},[21,846,23],{},[38,848,849],{},"heroctl logs"," retorna vazio mesmo com app rodando e gerando saída.",[10,852,853],{},[21,854,29],{},[31,856,858],{"className":33,"code":857,"language":35,"meta":36,"style":36},"docker inspect \u003Ccontainer-id> | grep LogConfig\n",[38,859,860],{"__ignoreMap":36},[41,861,862,865,868,870,873,875,877,879,881],{"class":43,"line":44},[41,863,864],{"class":47},"docker",[41,866,867],{"class":51}," inspect",[41,869,152],{"class":80},[41,871,872],{"class":51},"container-i",[41,874,600],{"class":119},[41,876,254],{"class":80},[41,878,81],{"class":80},[41,880,84],{"class":47},[41,882,883],{"class":51}," LogConfig\n",[10,885,886,887,890],{},"Se aparece ",[38,888,889],{},"\"Type\": \"none\""," ou um driver não suportado, o problema está aí.",[10,892,893],{},[21,894,95],{},[10,896,897],{},"Configure o driver de log padrão na máquina:",[31,899,903],{"className":900,"code":901,"language":902,"meta":36,"style":36},"language-json shiki shiki-themes github-dark-default","\u002F\u002F \u002Fetc\u002Fdocker\u002Fdaemon.json\n{\n  \"log-driver\": \"json-file\",\n  \"log-opts\": {\n    \"max-size\": \"100m\",\n    \"max-file\": \"3\"\n  }\n}\n","json",[38,904,905,910,915,928,936,948,958,963],{"__ignoreMap":36},[41,906,907],{"class":43,"line":44},[41,908,909],{"class":65},"\u002F\u002F \u002Fetc\u002Fdocker\u002Fdaemon.json\n",[41,911,912],{"class":43,"line":62},[41,913,914],{"class":119},"{\n",[41,916,917,920,922,925],{"class":43,"line":69},[41,918,919],{"class":115},"  \"log-driver\"",[41,921,128],{"class":119},[41,923,924],{"class":51},"\"json-file\"",[41,926,927],{"class":119},",\n",[41,929,930,933],{"class":43,"line":231},[41,931,932],{"class":115},"  \"log-opts\"",[41,934,935],{"class":119},": {\n",[41,937,938,941,943,946],{"class":43,"line":237},[41,939,940],{"class":115},"    \"max-size\"",[41,942,128],{"class":119},[41,944,945],{"class":51},"\"100m\"",[41,947,927],{"class":119},[41,949,950,953,955],{"class":43,"line":260},[41,951,952],{"class":115},"    \"max-file\"",[41,954,128],{"class":119},[41,956,957],{"class":51},"\"3\"\n",[41,959,960],{"class":43,"line":278},[41,961,962],{"class":119},"  }\n",[41,964,965],{"class":43,"line":296},[41,966,967],{"class":119},"}\n",[10,969,970],{},"Reinicie o serviço:",[31,972,974],{"className":33,"code":973,"language":35,"meta":36,"style":36},"sudo systemctl restart docker\n",[38,975,976],{"__ignoreMap":36},[41,977,978,980,982,985],{"class":43,"line":44},[41,979,48],{"class":47},[41,981,168],{"class":51},[41,983,984],{"class":51}," restart",[41,986,987],{"class":51}," docker\n",[14,989,991],{"id":990},"_9-conexao-com-postgres-dando-timeout","9. Conexão com Postgres dando timeout",[10,993,994,996,997,1000,1001,1004],{},[21,995,23],{}," o app loga ",[38,998,999],{},"connection timeout"," ou ",[38,1002,1003],{},"too many clients"," quando conecta no banco.",[10,1006,1007],{},[21,1008,29],{},[10,1010,1011],{},"No Postgres:",[31,1013,1017],{"className":1014,"code":1015,"language":1016,"meta":36,"style":36},"language-sql shiki shiki-themes github-dark-default","SELECT count(*) FROM pg_stat_activity;\nSHOW max_connections;\n","sql",[38,1018,1019,1042],{"__ignoreMap":36},[41,1020,1021,1024,1027,1030,1033,1036,1039],{"class":43,"line":44},[41,1022,1023],{"class":80},"SELECT",[41,1025,1026],{"class":55}," count",[41,1028,1029],{"class":119},"(",[41,1031,1032],{"class":80},"*",[41,1034,1035],{"class":119},") ",[41,1037,1038],{"class":80},"FROM",[41,1040,1041],{"class":119}," pg_stat_activity;\n",[41,1043,1044],{"class":43,"line":62},[41,1045,1046],{"class":119},"SHOW max_connections;\n",[10,1048,1049,1050,1053,1054,1057],{},"Se ",[38,1051,1052],{},"count(*)"," está perto de ",[38,1055,1056],{},"max_connections",", o pool está saturado.",[10,1059,1060],{},[21,1061,95],{},[10,1063,1064],{},"Coloque um pgbouncer entre app e banco:",[31,1066,1068],{"className":101,"code":1067,"language":103,"meta":36,"style":36},"# job pgbouncer\nconfig:\n  max_client_conn: 1000\n  default_pool_size: 25\n",[38,1069,1070,1075,1082,1092],{"__ignoreMap":36},[41,1071,1072],{"class":43,"line":44},[41,1073,1074],{"class":65},"# job pgbouncer\n",[41,1076,1077,1080],{"class":43,"line":62},[41,1078,1079],{"class":115},"config",[41,1081,120],{"class":119},[41,1083,1084,1087,1089],{"class":43,"line":69},[41,1085,1086],{"class":115},"  max_client_conn",[41,1088,128],{"class":119},[41,1090,1091],{"class":55},"1000\n",[41,1093,1094,1097,1099],{"class":43,"line":231},[41,1095,1096],{"class":115},"  default_pool_size",[41,1098,128],{"class":119},[41,1100,1101],{"class":55},"25\n",[10,1103,1104],{},"E aponte os apps para o pgbouncer em vez do banco direto. Você pode atender milhares de conexões de cliente com poucas conexões reais ao banco.",[14,1106,1108],{"id":1107},"_10-cluster-aparenta-ter-dois-coordenadores","10. Cluster aparenta ter dois coordenadores",[10,1110,1111,1113],{},[21,1112,23],{}," comportamentos estranhos — escrita em um nó não aparece no outro. Métricas inconsistentes entre painéis.",[10,1115,1116],{},[21,1117,29],{},[31,1119,1121],{"className":33,"code":1120,"language":35,"meta":36,"style":36},"heroctl cluster peers\n",[38,1122,1123],{"__ignoreMap":36},[41,1124,1125,1127,1129],{"class":43,"line":44},[41,1126,213],{"class":47},[41,1128,216],{"class":51},[41,1130,1131],{"class":51}," peers\n",[10,1133,1134],{},"Se a lista de pares varia dependendo do nó que você consulta, houve uma divisão de rede e duas metades acharam que eram a metade boa.",[10,1136,1137],{},[21,1138,95],{},[10,1140,1141],{},"Identifique a metade minoritária (a com menos nós) e reinicie esses nós:",[31,1143,1145],{"className":33,"code":1144,"language":35,"meta":36,"style":36},"sudo systemctl restart heroctl-server\n",[38,1146,1147],{"__ignoreMap":36},[41,1148,1149,1151,1153,1155],{"class":43,"line":44},[41,1150,48],{"class":47},[41,1152,168],{"class":51},[41,1154,984],{"class":51},[41,1156,174],{"class":51},[10,1158,1159],{},"Eles re-sincronizam com a metade majoritária e a inconsistência some. Em seguida verifique se algum dado divergiu durante o intervalo:",[31,1161,1163],{"className":33,"code":1162,"language":35,"meta":36,"style":36},"heroctl jobs status --all | grep -i diverge\n",[38,1164,1165],{"__ignoreMap":36},[41,1166,1167,1169,1171,1174,1177,1179,1181,1183],{"class":43,"line":44},[41,1168,213],{"class":47},[41,1170,505],{"class":51},[41,1172,1173],{"class":51}," status",[41,1175,1176],{"class":55}," --all",[41,1178,81],{"class":80},[41,1180,84],{"class":47},[41,1182,56],{"class":55},[41,1184,1185],{"class":51}," diverge\n",[14,1187,1189],{"id":1188},"_11-disco-cheio","11. Disco cheio",[10,1191,1192,1194,1195,1198],{},[21,1193,23],{}," o nó começa a se comportar mal. API lenta. Agente reinicia containers sem motivo aparente. ",[38,1196,1197],{},"df -h"," mostra 100%.",[10,1200,1201],{},[21,1202,29],{},[31,1204,1206],{"className":33,"code":1205,"language":35,"meta":36,"style":36},"sudo du -sh \u002Fvar\u002Flib\u002Fheroctl\u002F* | sort -h\nsudo du -sh \u002Fvar\u002Flog\u002F* | sort -h\n",[38,1207,1208,1231],{"__ignoreMap":36},[41,1209,1210,1212,1215,1218,1221,1223,1225,1228],{"class":43,"line":44},[41,1211,48],{"class":47},[41,1213,1214],{"class":51}," du",[41,1216,1217],{"class":55}," -sh",[41,1219,1220],{"class":51}," \u002Fvar\u002Flib\u002Fheroctl\u002F",[41,1222,1032],{"class":55},[41,1224,81],{"class":80},[41,1226,1227],{"class":47}," sort",[41,1229,1230],{"class":55}," -h\n",[41,1232,1233,1235,1237,1239,1242,1244,1246,1248],{"class":43,"line":62},[41,1234,48],{"class":47},[41,1236,1214],{"class":51},[41,1238,1217],{"class":55},[41,1240,1241],{"class":51}," \u002Fvar\u002Flog\u002F",[41,1243,1032],{"class":55},[41,1245,81],{"class":80},[41,1247,1227],{"class":47},[41,1249,1230],{"class":55},[10,1251,1252],{},"Os culpados de sempre são logs antigos e snapshots não limpos.",[10,1254,1255],{},[21,1256,95],{},[10,1258,1259],{},"Configure rotação:",[31,1261,1263],{"className":101,"code":1262,"language":103,"meta":36,"style":36},"# \u002Fetc\u002Fheroctl\u002Fserver.yaml\nlogs:\n  retention_days: 7\n  max_size_per_alloc_mb: 500\n\nsnapshots:\n  retention_count: 10\n",[38,1264,1265,1269,1276,1286,1296,1300,1307],{"__ignoreMap":36},[41,1266,1267],{"class":43,"line":44},[41,1268,110],{"class":65},[41,1270,1271,1274],{"class":43,"line":62},[41,1272,1273],{"class":115},"logs",[41,1275,120],{"class":119},[41,1277,1278,1281,1283],{"class":43,"line":69},[41,1279,1280],{"class":115},"  retention_days",[41,1282,128],{"class":119},[41,1284,1285],{"class":55},"7\n",[41,1287,1288,1291,1293],{"class":43,"line":231},[41,1289,1290],{"class":115},"  max_size_per_alloc_mb",[41,1292,128],{"class":119},[41,1294,1295],{"class":55},"500\n",[41,1297,1298],{"class":43,"line":237},[41,1299,228],{"emptyLinePlaceholder":227},[41,1301,1302,1305],{"class":43,"line":260},[41,1303,1304],{"class":115},"snapshots",[41,1306,120],{"class":119},[41,1308,1309,1312,1314],{"class":43,"line":278},[41,1310,1311],{"class":115},"  retention_count",[41,1313,128],{"class":119},[41,1315,1316],{"class":55},"10\n",[10,1318,1319],{},"E uma faxina manual imediata:",[31,1321,1323],{"className":33,"code":1322,"language":35,"meta":36,"style":36},"sudo journalctl --vacuum-time=3d\nheroctl snapshot prune --keep 10\n",[38,1324,1325,1335],{"__ignoreMap":36},[41,1326,1327,1329,1332],{"class":43,"line":44},[41,1328,48],{"class":47},[41,1330,1331],{"class":51}," journalctl",[41,1333,1334],{"class":55}," --vacuum-time=3d\n",[41,1336,1337,1339,1341,1344,1347],{"class":43,"line":62},[41,1338,213],{"class":47},[41,1340,450],{"class":51},[41,1342,1343],{"class":51}," prune",[41,1345,1346],{"class":55}," --keep",[41,1348,1349],{"class":55}," 10\n",[14,1351,1353],{"id":1352},"_12-container-morto-por-falta-de-memoria","12. Container morto por falta de memória",[10,1355,1356,481,1358,1360,1361,1364],{},[21,1357,23],{},[38,1359,849],{}," termina com ",[38,1362,1363],{},"OOMKilled",". O container reinicia em loop.",[10,1366,1367],{},[21,1368,29],{},[31,1370,1372],{"className":33,"code":1371,"language":35,"meta":36,"style":36},"heroctl alloc status \u003Cid> | grep -A5 \"memory\"\n",[38,1373,1374],{"__ignoreMap":36},[41,1375,1376,1378,1381,1383,1385,1388,1390,1392,1394,1396,1399],{"class":43,"line":44},[41,1377,213],{"class":47},[41,1379,1380],{"class":51}," alloc",[41,1382,1173],{"class":51},[41,1384,152],{"class":80},[41,1386,1387],{"class":51},"i",[41,1389,600],{"class":119},[41,1391,254],{"class":80},[41,1393,81],{"class":80},[41,1395,84],{"class":47},[41,1397,1398],{"class":55}," -A5",[41,1400,1401],{"class":51}," \"memory\"\n",[10,1403,1404],{},"Compare uso real com o limite definido.",[10,1406,1407],{},[21,1408,95],{},[10,1410,1411],{},"Aumente o limite no spec do job:",[31,1413,1415],{"className":101,"code":1414,"language":103,"meta":36,"style":36},"resources:\n  memory_mb: 1024    # era 512\n",[38,1416,1417,1423],{"__ignoreMap":36},[41,1418,1419,1421],{"class":43,"line":44},[41,1420,539],{"class":115},[41,1422,120],{"class":119},[41,1424,1425,1427,1429,1432],{"class":43,"line":62},[41,1426,559],{"class":115},[41,1428,128],{"class":119},[41,1430,1431],{"class":55},"1024",[41,1433,1434],{"class":65},"    # era 512\n",[10,1436,1437],{},"Suba a nova versão:",[31,1439,1441],{"className":33,"code":1440,"language":35,"meta":36,"style":36},"heroctl jobs submit meu-app.json\n",[38,1442,1443],{"__ignoreMap":36},[41,1444,1445,1447,1449,1452],{"class":43,"line":44},[41,1446,213],{"class":47},[41,1448,505],{"class":51},[41,1450,1451],{"class":51}," submit",[41,1453,1454],{"class":51}," meu-app.json\n",[10,1456,1457],{},"Se o uso de memória cresce com o tempo (vazamento), aumentar o limite só adia o problema. Investigue o app.",[14,1459,1461],{"id":1460},"quando-nada-disso-ajuda","Quando nada disso ajuda",[10,1463,1464],{},"Reúna as seguintes informações antes de abrir um chamado:",[319,1466,1467,1473,1479,1486],{},[322,1468,1469,1472],{},[38,1470,1471],{},"heroctl cluster status"," (saída completa)",[322,1474,1475,1478],{},[38,1476,1477],{},"heroctl version"," em todos os nós",[322,1480,1481,1482,1485],{},"O ",[38,1483,1484],{},"request_id"," retornado pelo erro da API",[322,1487,1488],{},"Recorte do log com timestamp do incidente",[10,1490,1491,1492,1498],{},"Mande para ",[21,1493,1494],{},[332,1495,1497],{"href":1496},"mailto:suporte@heroctl.com","suporte@heroctl.com"," com essas informações no corpo da mensagem. Quanto mais contexto, mais rápida a resposta.",[14,1500,1502],{"id":1501},"proximos-passos","Próximos passos",[319,1504,1505,1512,1518],{},[322,1506,1507,1511],{},[332,1508,1510],{"href":1509},"\u002Fdocs\u002Fobservabilidade\u002Fmetricas-logs","Métricas e alertas"," — detectar problemas antes do usuário.",[322,1513,1514,1517],{},[332,1515,1516],{"href":471},"Backup e restore"," — preparação para os cenários mais graves.",[322,1519,1520,1524],{},[332,1521,1523],{"href":1522},"\u002Fdocs\u002Fapi\u002Freferencia-api","Referência da API"," — quando o CLI não for suficiente.",[1526,1527,1528],"style",{},"html pre.shiki code .sQhOw, html code.shiki .sQhOw{--shiki-default:#FFA657}html pre.shiki code .s9uIt, html code.shiki .s9uIt{--shiki-default:#A5D6FF}html pre.shiki code .sFSAA, html code.shiki .sFSAA{--shiki-default:#79C0FF}html pre.shiki code .sH3jZ, html code.shiki .sH3jZ{--shiki-default:#8B949E}html pre.shiki code .suJrU, html code.shiki .suJrU{--shiki-default:#FF7B72}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sPWt5, html code.shiki .sPWt5{--shiki-default:#7EE787}html pre.shiki code .sZEs4, html code.shiki .sZEs4{--shiki-default:#E6EDF3}",{"title":36,"searchDepth":62,"depth":62,"links":1530},[1531,1532,1533,1534,1535,1536,1537,1538,1539,1540,1541,1542,1543,1544],{"id":16,"depth":62,"text":17},{"id":177,"depth":62,"text":178},{"id":376,"depth":62,"text":377},{"id":475,"depth":62,"text":476},{"id":570,"depth":62,"text":571},{"id":694,"depth":62,"text":695},{"id":766,"depth":62,"text":767},{"id":841,"depth":62,"text":842},{"id":990,"depth":62,"text":991},{"id":1107,"depth":62,"text":1108},{"id":1188,"depth":62,"text":1189},{"id":1352,"depth":62,"text":1353},{"id":1460,"depth":62,"text":1461},{"id":1501,"depth":62,"text":1502},"troubleshooting","Os 12 problemas mais frequentes em clusters HeroCtl, com sintoma, diagnóstico e correção passo a passo.",false,"md","i-lucide-alert-triangle","2026-04-26",{},"\u002Fdocs\u002Ftroubleshooting\u002Fproblemas-comuns",[],"10 min",{"title":5,"description":1546},"docs\u002Ftroubleshooting\u002Fproblemas-comuns",[1545,1558,1559,1560],"diagnostico","operacao","incidentes","trl75xa33qE7w5xKyb2Z686fVToUO_2UPafgLvIkpTQ",[1563,1567,1573,1578,1583,1587,1593,1598,1603,1608,1613,1617,1623,1628],{"path":1522,"title":1564,"description":1565,"category":116,"order":44,"icon":1566},"Referência da API REST","Endpoints, autenticação JWT, exemplos com curl e padrões de erro da API do HeroCtl.","i-lucide-code",{"path":1568,"title":1569,"description":1570,"category":1571,"order":44,"icon":1572},"\u002Fdocs\u002Fdeploy\u002Fprimeiro-deploy","Deploy do primeiro app","Suba uma aplicação Node.js com banco Postgres em 50 linhas de YAML. Inclui health check, rolling deploy e rollback.","deploy","i-lucide-rocket",{"path":1574,"title":1575,"description":1576,"category":1571,"order":62,"icon":1577},"\u002Fdocs\u002Fdeploy\u002Frolling-canary-bluegreen","Rolling, canary, blue-green e rainbow","Quatro estratégias de deploy. Quando usar cada uma, com exemplos completos e trade-offs honestos.","i-lucide-git-branch",{"path":471,"title":1579,"description":1580,"category":1581,"order":62,"icon":1582},"Backup e restore do estado do cluster","Como salvar, agendar e restaurar snapshots do plano de controle do HeroCtl. Estratégia de disaster recovery.","observabilidade","i-lucide-archive",{"path":1509,"title":1584,"description":1585,"category":1581,"order":44,"icon":1586},"Métricas e logs","Coleta de métricas, logs e traces sem montar uma pilha de observabilidade externa. Quando vale, e quando integrar com ferramenta de fora.","i-lucide-activity",{"path":1588,"title":1589,"description":1590,"category":1591,"order":69,"icon":1592},"\u002Fdocs\u002Foperacoes\u002Fcomandos-cli","Referência completa do CLI","Todos os comandos heroctl com sinopse, flags e exemplo. Use como cola de mesa.","operacoes","i-lucide-terminal",{"path":1594,"title":1595,"description":1596,"category":1591,"order":44,"icon":1597},"\u002Fdocs\u002Foperacoes\u002Finstalacao","Instalação","Instale o HeroCtl em qualquer servidor Linux com Docker em um único comando. Cobre pré-requisitos, bootstrap e verificação.","i-lucide-download",{"path":1599,"title":1600,"description":1601,"category":1591,"order":231,"icon":1602},"\u002Fdocs\u002Foperacoes\u002Fmulti-region","Multi-region (em planejamento Q4 2026)","O que esperar de multi-region no HeroCtl, como rodar em várias regiões hoje e o roadmap até 2027.","i-lucide-globe",{"path":1604,"title":1605,"description":1606,"category":1591,"order":62,"icon":1607},"\u002Fdocs\u002Foperacoes\u002Fprimeiro-cluster","Subir cluster de 3 nós","Forme um cluster com 3 servidores em menos de 10 minutos. Tolera falha de 1 nó sem indisponibilidade.","i-lucide-network",{"path":334,"title":1609,"description":1610,"category":1611,"order":62,"icon":1612},"Configuração de firewall","Quais portas o HeroCtl usa, quais precisam ficar abertas, e quais nunca deveriam ser expostas à internet.","rede","i-lucide-shield",{"path":1614,"title":1615,"description":1616,"category":1611,"order":44,"icon":1602},"\u002Fdocs\u002Frede\u002Fingress-tls","Ingress e TLS automático","Como expor aplicações pela porta 443 com certificados emitidos e renovados automaticamente, sem operar um roteador externo.",{"path":1618,"title":1619,"description":1620,"category":1621,"order":62,"icon":1622},"\u002Fdocs\u002Fseguranca\u002Frbac","RBAC e controle de acesso (Business+)","Modelo de papéis, políticas e tokens para limitar quem pode submeter, ler e operar o cluster.","seguranca","i-lucide-users",{"path":1624,"title":1625,"description":1626,"category":1621,"order":44,"icon":1627},"\u002Fdocs\u002Fseguranca\u002Fsecrets","Gerenciamento de segredos","Como guardar senhas, tokens e chaves fora do spec do job, com criptografia em repouso e rotação versionada.","i-lucide-key",{"path":1552,"title":5,"description":1546,"category":1545,"order":44,"icon":1549},1777362179674]