hydro_deploy/
gcp.rs

1use std::any::Any;
2use std::collections::HashMap;
3use std::fmt::Debug;
4use std::sync::{Arc, Mutex, OnceLock};
5
6use anyhow::Result;
7use nanoid::nanoid;
8use serde_json::json;
9
10use super::terraform::{TERRAFORM_ALPHABET, TerraformOutput, TerraformProvider};
11use super::{ClientStrategy, Host, HostTargetType, LaunchedHost, ResourceBatch, ResourceResult};
12use crate::ssh::LaunchedSshHost;
13use crate::{BaseServerStrategy, HostStrategyGetter, PortNetworkHint};
14
15pub struct LaunchedComputeEngine {
16    resource_result: Arc<ResourceResult>,
17    user: String,
18    pub internal_ip: String,
19    pub external_ip: Option<String>,
20}
21
22impl LaunchedSshHost for LaunchedComputeEngine {
23    fn get_external_ip(&self) -> Option<String> {
24        self.external_ip.clone()
25    }
26
27    fn get_internal_ip(&self) -> String {
28        self.internal_ip.clone()
29    }
30
31    fn get_cloud_provider(&self) -> String {
32        "GCP".to_string()
33    }
34
35    fn resource_result(&self) -> &Arc<ResourceResult> {
36        &self.resource_result
37    }
38
39    fn ssh_user(&self) -> &str {
40        self.user.as_str()
41    }
42}
43
44#[derive(Debug)]
45pub struct GcpNetwork {
46    pub project: String,
47    pub existing_vpc: OnceLock<String>,
48    id: String,
49}
50
51impl GcpNetwork {
52    pub fn new(project: impl Into<String>, existing_vpc: Option<String>) -> Arc<Self> {
53        Arc::new(Self {
54            project: project.into(),
55            existing_vpc: existing_vpc.map(From::from).unwrap_or_default(),
56            id: nanoid!(8, &TERRAFORM_ALPHABET),
57        })
58    }
59
60    fn collect_resources(&self, resource_batch: &mut ResourceBatch) -> String {
61        resource_batch
62            .terraform
63            .terraform
64            .required_providers
65            .insert(
66                "google".to_string(),
67                TerraformProvider {
68                    source: "hashicorp/google".to_string(),
69                    version: "4.53.1".to_string(),
70                },
71            );
72
73        let vpc_network = format!("hydro-vpc-network-{}", self.id);
74
75        if let Some(existing) = self.existing_vpc.get() {
76            if resource_batch
77                .terraform
78                .resource
79                .get("google_compute_network")
80                .unwrap_or(&HashMap::new())
81                .contains_key(existing)
82            {
83                format!("google_compute_network.{existing}")
84            } else {
85                resource_batch
86                    .terraform
87                    .data
88                    .entry("google_compute_network".to_string())
89                    .or_default()
90                    .insert(
91                        vpc_network.clone(),
92                        json!({
93                            "name": existing,
94                            "project": self.project,
95                        }),
96                    );
97
98                format!("data.google_compute_network.{vpc_network}")
99            }
100        } else {
101            resource_batch
102                .terraform
103                .resource
104                .entry("google_compute_network".to_string())
105                .or_default()
106                .insert(
107                    vpc_network.clone(),
108                    json!({
109                        "name": vpc_network,
110                        "project": self.project,
111                        "auto_create_subnetworks": true
112                    }),
113                );
114
115            let firewall_entries = resource_batch
116                .terraform
117                .resource
118                .entry("google_compute_firewall".to_string())
119                .or_default();
120
121            // allow all VMs to communicate with each other over internal IPs
122            firewall_entries.insert(
123                format!("{vpc_network}-default-allow-internal"),
124                json!({
125                    "name": format!("{vpc_network}-default-allow-internal"),
126                    "project": self.project,
127                    "network": format!("${{google_compute_network.{vpc_network}.name}}"),
128                    "source_ranges": ["10.128.0.0/9"],
129                    "allow": [
130                        {
131                            "protocol": "tcp",
132                            "ports": ["0-65535"]
133                        },
134                        {
135                            "protocol": "udp",
136                            "ports": ["0-65535"]
137                        },
138                        {
139                            "protocol": "icmp"
140                        }
141                    ]
142                }),
143            );
144
145            // allow external pings to all VMs
146            firewall_entries.insert(
147                format!("{vpc_network}-default-allow-ping"),
148                json!({
149                    "name": format!("{vpc_network}-default-allow-ping"),
150                    "project": self.project,
151                    "network": format!("${{google_compute_network.{vpc_network}.name}}"),
152                    "source_ranges": ["0.0.0.0/0"],
153                    "allow": [
154                        {
155                            "protocol": "icmp"
156                        }
157                    ]
158                }),
159            );
160
161            let out = format!("google_compute_network.{vpc_network}");
162            self.existing_vpc.set(vpc_network).unwrap();
163            out
164        }
165    }
166}
167
168pub struct GcpComputeEngineHost {
169    /// ID from [`crate::Deployment::add_host`].
170    id: usize,
171
172    project: String,
173    machine_type: String,
174    image: String,
175    target_type: HostTargetType,
176    region: String,
177    network: Arc<GcpNetwork>,
178    user: Option<String>,
179    display_name: Option<String>,
180    pub launched: OnceLock<Arc<LaunchedComputeEngine>>, // TODO(mingwei): fix pub
181    external_ports: Mutex<Vec<u16>>,
182}
183
184impl Debug for GcpComputeEngineHost {
185    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
186        f.write_fmt(format_args!(
187            "GcpComputeEngineHost({} ({:?}))",
188            self.id, &self.display_name
189        ))
190    }
191}
192
193impl GcpComputeEngineHost {
194    #[expect(clippy::too_many_arguments, reason = "used via builder pattern")]
195    pub fn new(
196        id: usize,
197        project: impl Into<String>,
198        machine_type: impl Into<String>,
199        image: impl Into<String>,
200        target_type: HostTargetType,
201        region: impl Into<String>,
202        network: Arc<GcpNetwork>,
203        user: Option<String>,
204        display_name: Option<String>,
205    ) -> Self {
206        Self {
207            id,
208            project: project.into(),
209            machine_type: machine_type.into(),
210            image: image.into(),
211            target_type,
212            region: region.into(),
213            network,
214            user,
215            display_name,
216            launched: OnceLock::new(),
217            external_ports: Mutex::new(Vec::new()),
218        }
219    }
220}
221
222impl Host for GcpComputeEngineHost {
223    fn target_type(&self) -> HostTargetType {
224        self.target_type
225    }
226
227    fn request_port_base(&self, bind_type: &BaseServerStrategy) {
228        match bind_type {
229            BaseServerStrategy::UnixSocket => {}
230            BaseServerStrategy::InternalTcpPort(_) => {}
231            BaseServerStrategy::ExternalTcpPort(port) => {
232                let mut external_ports = self.external_ports.lock().unwrap();
233                if !external_ports.contains(port) {
234                    if self.launched.get().is_some() {
235                        todo!("Cannot adjust firewall after host has been launched");
236                    }
237                    external_ports.push(*port);
238                }
239            }
240        }
241    }
242
243    fn request_custom_binary(&self) {
244        self.request_port_base(&BaseServerStrategy::ExternalTcpPort(22));
245    }
246
247    fn id(&self) -> usize {
248        self.id
249    }
250
251    fn collect_resources(&self, resource_batch: &mut ResourceBatch) {
252        if self.launched.get().is_some() {
253            return;
254        }
255
256        let vpc_path = self.network.collect_resources(resource_batch);
257
258        let project = self.project.as_str();
259
260        // first, we import the providers we need
261        resource_batch
262            .terraform
263            .terraform
264            .required_providers
265            .insert(
266                "google".to_string(),
267                TerraformProvider {
268                    source: "hashicorp/google".to_string(),
269                    version: "4.53.1".to_string(),
270                },
271            );
272
273        resource_batch
274            .terraform
275            .terraform
276            .required_providers
277            .insert(
278                "local".to_string(),
279                TerraformProvider {
280                    source: "hashicorp/local".to_string(),
281                    version: "2.3.0".to_string(),
282                },
283            );
284
285        resource_batch
286            .terraform
287            .terraform
288            .required_providers
289            .insert(
290                "tls".to_string(),
291                TerraformProvider {
292                    source: "hashicorp/tls".to_string(),
293                    version: "4.0.4".to_string(),
294                },
295            );
296
297        // we use a single SSH key for all VMs
298        resource_batch
299            .terraform
300            .resource
301            .entry("tls_private_key".to_string())
302            .or_default()
303            .insert(
304                "vm_instance_ssh_key".to_string(),
305                json!({
306                    "algorithm": "RSA",
307                    "rsa_bits": 4096
308                }),
309            );
310
311        resource_batch
312            .terraform
313            .resource
314            .entry("local_file".to_string())
315            .or_default()
316            .insert(
317                "vm_instance_ssh_key_pem".to_string(),
318                json!({
319                    "content": "${tls_private_key.vm_instance_ssh_key.private_key_pem}",
320                    "filename": ".ssh/vm_instance_ssh_key_pem",
321                    "file_permission": "0600"
322                }),
323            );
324
325        let vm_key = format!("vm-instance-{}", self.id);
326        let mut vm_name = format!("hydro-vm-instance-{}", nanoid!(8, &TERRAFORM_ALPHABET),);
327        // Name must match regex: (?:[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?), max length = 63 (61 + 1 a-z before and after)
328        if let Some(mut display_name) = self.display_name.clone() {
329            vm_name.push('-');
330            display_name = display_name
331                .replace("_", "-")
332                .replace(":", "-")
333                .to_lowercase();
334
335            // Keep the latter half of display_name if it is too long
336            let num_chars_to_cut = vm_name.len() + display_name.len() - 63;
337            if num_chars_to_cut > 0 {
338                display_name.drain(0..num_chars_to_cut);
339            }
340            vm_name.push_str(&display_name);
341        }
342
343        let mut tags = vec![];
344        let mut external_interfaces = vec![];
345
346        let external_ports = self.external_ports.lock().unwrap();
347        if external_ports.is_empty() {
348            external_interfaces.push(json!({ "network": format!("${{{vpc_path}.self_link}}") }));
349        } else {
350            external_interfaces.push(json!({
351                "network": format!("${{{vpc_path}.self_link}}"),
352                "access_config": [
353                    {
354                        "network_tier": "STANDARD"
355                    }
356                ]
357            }));
358
359            // open the external ports that were requested
360            let my_external_tags = external_ports.iter().map(|port| {
361                let rule_id = nanoid!(8, &TERRAFORM_ALPHABET);
362                let firewall_rule = resource_batch
363                    .terraform
364                    .resource
365                    .entry("google_compute_firewall".to_string())
366                    .or_default()
367                    .entry(format!("open-external-port-{}", port))
368                    .or_insert(json!({
369                        "name": format!("open-external-port-{}-{}", port, rule_id),
370                        "project": project,
371                        "network": format!("${{{vpc_path}.name}}"),
372                        "target_tags": [format!("open-external-port-tag-{}-{}", port, rule_id)],
373                        "source_ranges": ["0.0.0.0/0"],
374                        "allow": [
375                            {
376                                "protocol": "tcp",
377                                "ports": vec![port.to_string()]
378                            }
379                        ]
380                    }));
381
382                firewall_rule["target_tags"].as_array().unwrap()[0].clone()
383            });
384
385            tags.extend(my_external_tags);
386
387            resource_batch.terraform.output.insert(
388                format!("{vm_key}-public-ip"),
389                TerraformOutput {
390                    value: format!("${{google_compute_instance.{vm_key}.network_interface[0].access_config[0].nat_ip}}")
391                }
392            );
393        }
394        drop(external_ports); // Drop the lock as soon as possible.
395
396        let user = self.user.as_deref().unwrap_or("hydro");
397        resource_batch
398            .terraform
399            .resource
400            .entry("google_compute_instance".to_string())
401            .or_default()
402            .insert(
403                vm_key.clone(),
404                json!({
405                    "name": vm_name,
406                    "project": project,
407                    "machine_type": self.machine_type,
408                    "zone": self.region,
409                    "tags": tags,
410                    "metadata": {
411                        "ssh-keys": format!("{user}:${{tls_private_key.vm_instance_ssh_key.public_key_openssh}}")
412                    },
413                    "boot_disk": [
414                        {
415                            "initialize_params": [
416                                {
417                                    "image": self.image
418                                }
419                            ]
420                        }
421                    ],
422                    "network_interface": external_interfaces,
423                }),
424            );
425
426        resource_batch.terraform.output.insert(
427            format!("{vm_key}-internal-ip"),
428            TerraformOutput {
429                value: format!(
430                    "${{google_compute_instance.{vm_key}.network_interface[0].network_ip}}"
431                ),
432            },
433        );
434    }
435
436    fn launched(&self) -> Option<Arc<dyn LaunchedHost>> {
437        self.launched
438            .get()
439            .map(|a| a.clone() as Arc<dyn LaunchedHost>)
440    }
441
442    fn provision(&self, resource_result: &Arc<ResourceResult>) -> Arc<dyn LaunchedHost> {
443        self.launched
444            .get_or_init(|| {
445                let id = self.id;
446
447                let internal_ip = resource_result
448                    .terraform
449                    .outputs
450                    .get(&format!("vm-instance-{id}-internal-ip"))
451                    .unwrap()
452                    .value
453                    .clone();
454
455                let external_ip = resource_result
456                    .terraform
457                    .outputs
458                    .get(&format!("vm-instance-{id}-public-ip"))
459                    .map(|v| v.value.clone());
460
461                Arc::new(LaunchedComputeEngine {
462                    resource_result: resource_result.clone(),
463                    user: self.user.as_ref().cloned().unwrap_or("hydro".to_string()),
464                    internal_ip,
465                    external_ip,
466                })
467            })
468            .clone()
469    }
470
471    fn strategy_as_server<'a>(
472        &'a self,
473        client_host: &dyn Host,
474        network_hint: PortNetworkHint,
475    ) -> Result<(ClientStrategy<'a>, HostStrategyGetter)> {
476        if matches!(network_hint, PortNetworkHint::Auto)
477            && client_host.can_connect_to(ClientStrategy::UnixSocket(self.id))
478        {
479            Ok((
480                ClientStrategy::UnixSocket(self.id),
481                Box::new(|_| BaseServerStrategy::UnixSocket),
482            ))
483        } else if matches!(
484            network_hint,
485            PortNetworkHint::Auto | PortNetworkHint::TcpPort(_)
486        ) && client_host.can_connect_to(ClientStrategy::InternalTcpPort(self))
487        {
488            Ok((
489                ClientStrategy::InternalTcpPort(self),
490                Box::new(move |_| {
491                    BaseServerStrategy::InternalTcpPort(match network_hint {
492                        PortNetworkHint::Auto => None,
493                        PortNetworkHint::TcpPort(port) => port,
494                    })
495                }),
496            ))
497        } else if matches!(network_hint, PortNetworkHint::Auto)
498            && client_host.can_connect_to(ClientStrategy::ForwardedTcpPort(self))
499        {
500            Ok((
501                ClientStrategy::ForwardedTcpPort(self),
502                Box::new(|me| {
503                    me.downcast_ref::<GcpComputeEngineHost>()
504                        .unwrap()
505                        .request_port_base(&BaseServerStrategy::ExternalTcpPort(22)); // needed to forward
506                    BaseServerStrategy::InternalTcpPort(None)
507                }),
508            ))
509        } else {
510            anyhow::bail!("Could not find a strategy to connect to GCP instance")
511        }
512    }
513
514    fn can_connect_to(&self, typ: ClientStrategy) -> bool {
515        match typ {
516            ClientStrategy::UnixSocket(id) => {
517                #[cfg(unix)]
518                {
519                    self.id == id
520                }
521
522                #[cfg(not(unix))]
523                {
524                    let _ = id;
525                    false
526                }
527            }
528            ClientStrategy::InternalTcpPort(target_host) => {
529                if let Some(gcp_target) =
530                    <dyn Any>::downcast_ref::<GcpComputeEngineHost>(target_host)
531                {
532                    self.project == gcp_target.project
533                } else {
534                    false
535                }
536            }
537            ClientStrategy::ForwardedTcpPort(_) => false,
538        }
539    }
540}